diff options
Diffstat (limited to 'net')
484 files changed, 16888 insertions, 9090 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c index 7533ce26ba..888379ae35 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -175,4 +175,5 @@ struct net_device *alloc_fddidev(int sizeof_priv) } EXPORT_SYMBOL(alloc_fddidev); +MODULE_DESCRIPTION("Core routines for FDDI network devices"); MODULE_LICENSE("GPL"); diff --git a/net/802/garp.c b/net/802/garp.c index ab24b21fbb..6a743d0043 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -21,6 +21,7 @@ static unsigned int garp_join_time __read_mostly = 200; module_param(garp_join_time, uint, 0644); MODULE_PARM_DESC(garp_join_time, "Join time in ms (default 200ms)"); +MODULE_DESCRIPTION("IEEE 802.1D Generic Attribute Registration Protocol (GARP)"); MODULE_LICENSE("GPL"); static const struct garp_state_trans { diff --git a/net/802/mrp.c b/net/802/mrp.c index eafc21ecc2..3154d74094 100644 --- a/net/802/mrp.c +++ b/net/802/mrp.c @@ -26,6 +26,7 @@ static unsigned int mrp_periodic_time __read_mostly = 1000; module_param(mrp_periodic_time, uint, 0644); MODULE_PARM_DESC(mrp_periodic_time, "Periodic time in ms (default 1s)"); +MODULE_DESCRIPTION("IEEE 802.1Q Multiple Registration Protocol (MRP)"); MODULE_LICENSE("GPL"); static const u8 diff --git a/net/802/p8022.c b/net/802/p8022.c index 79c2317311..78c25168d7 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -60,4 +60,5 @@ void unregister_8022_client(struct datalink_proto *proto) EXPORT_SYMBOL(register_8022_client); EXPORT_SYMBOL(unregister_8022_client); +MODULE_DESCRIPTION("Support for 802.2 demultiplexing off Ethernet"); MODULE_LICENSE("GPL"); diff --git a/net/802/psnap.c b/net/802/psnap.c index 1406bfdbda..fca9d45490 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -160,4 +160,5 @@ void unregister_snap_client(struct datalink_proto *proto) kfree(proto); } +MODULE_DESCRIPTION("SNAP data link layer. Derived from 802.2"); MODULE_LICENSE("GPL"); diff --git a/net/802/stp.c b/net/802/stp.c index d550d9f88f..03c9f75e92 100644 --- a/net/802/stp.c +++ b/net/802/stp.c @@ -98,4 +98,5 @@ void stp_proto_unregister(const struct stp_proto *proto) } EXPORT_SYMBOL_GPL(stp_proto_unregister); +MODULE_DESCRIPTION("SAP demux for IEEE 802.1D Spanning Tree Protocol (STP)"); MODULE_LICENSE("GPL"); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index e40aa3e364..e45187b882 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -738,5 +738,6 @@ static void __exit vlan_cleanup_module(void) module_init(vlan_proto_init); module_exit(vlan_cleanup_module); +MODULE_DESCRIPTION("802.1Q/802.1ad VLAN Protocol"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index d0eb03ada7..1a3948b8c4 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -671,10 +671,14 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, current, &req->tc, req->tc.id); - if (m->err < 0) - return m->err; spin_lock(&m->req_lock); + + if (m->err < 0) { + spin_unlock(&m->req_lock); + return m->err; + } + WRITE_ONCE(req->status, REQ_STATUS_UNSENT); list_add_tail(&req->req_list, &m->unsent_req_list); spin_unlock(&m->req_lock); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 1fffe2bed5..dfdbe1ca53 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -54,7 +54,6 @@ struct xen_9pfs_front_priv { char *tag; struct p9_client *client; - int num_rings; struct xen_9pfs_dataring *rings; }; @@ -131,7 +130,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) if (list_entry_is_head(priv, &xen_9pfs_devs, list)) return -EINVAL; - num = p9_req->tc.tag % priv->num_rings; + num = p9_req->tc.tag % XEN_9PFS_NUM_RINGS; ring = &priv->rings[num]; again: @@ -279,7 +278,7 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) list_del(&priv->list); write_unlock(&xen_9pfs_lock); - for (i = 0; i < priv->num_rings; i++) { + for (i = 0; i < XEN_9PFS_NUM_RINGS; i++) { struct xen_9pfs_dataring *ring = &priv->rings[i]; cancel_work_sync(&ring->work); @@ -408,15 +407,14 @@ static int xen_9pfs_front_init(struct xenbus_device *dev) if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order)) p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2; - priv->num_rings = XEN_9PFS_NUM_RINGS; - priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings), + priv->rings = kcalloc(XEN_9PFS_NUM_RINGS, sizeof(*priv->rings), GFP_KERNEL); if (!priv->rings) { kfree(priv); return -ENOMEM; } - for (i = 0; i < priv->num_rings; i++) { + for (i = 0; i < XEN_9PFS_NUM_RINGS; i++) { priv->rings[i].priv = priv; ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i], max_ring_order); @@ -434,10 +432,11 @@ static int xen_9pfs_front_init(struct xenbus_device *dev) if (ret) goto error_xenbus; ret = xenbus_printf(xbt, dev->nodename, "num-rings", "%u", - priv->num_rings); + XEN_9PFS_NUM_RINGS); if (ret) goto error_xenbus; - for (i = 0; i < priv->num_rings; i++) { + + for (i = 0; i < XEN_9PFS_NUM_RINGS; i++) { char str[16]; BUILD_BUG_ON(XEN_9PFS_NUM_RINGS > 9); diff --git a/net/Kconfig b/net/Kconfig index d532ec33f1..3ec6bc98fa 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -246,7 +246,7 @@ source "net/bridge/Kconfig" source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/llc/Kconfig" -source "drivers/net/appletalk/Kconfig" +source "net/appletalk/Kconfig" source "net/x25/Kconfig" source "net/lapb/Kconfig" source "net/phonet/Kconfig" @@ -508,4 +508,13 @@ config NETDEV_ADDR_LIST_TEST default KUNIT_ALL_TESTS depends on KUNIT +config NET_TEST + tristate "KUnit tests for networking" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + KUnit tests covering core networking infra, such as sk_buff. + + If unsure, say N. + endif # if NET diff --git a/net/appletalk/Kconfig b/net/appletalk/Kconfig new file mode 100644 index 0000000000..041141abf9 --- /dev/null +++ b/net/appletalk/Kconfig @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Appletalk configuration +# +config ATALK + tristate "Appletalk protocol support" + select LLC + help + AppleTalk is the protocol that Apple computers can use to communicate + on a network. If your Linux box is connected to such a network and you + wish to connect to it, say Y. You will need to use the netatalk package + so that your Linux box can act as a print and file server for Macs as + well as access AppleTalk printers. Check out + <http://www.zettabyte.net/netatalk/> on the WWW for details. + EtherTalk is the name used for AppleTalk over Ethernet and the + cheaper and slower LocalTalk is AppleTalk over a proprietary Apple + network using serial links. EtherTalk and LocalTalk are fully + supported by Linux. + + General information about how to connect Linux, Windows machines and + Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. The + NET3-4-HOWTO, available from + <http://www.tldp.org/docs.html#howto>, contains valuable + information as well. + + To compile this driver as a module, choose M here: the module will be + called appletalk. You almost certainly want to compile it as a + module so you can restart your AppleTalk stack without rebooting + your machine. I hear that the GNU boycott of Apple is over, so + even politically correct people are allowed to say Y here. diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index c7236daa24..9fa0b24690 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -664,7 +664,7 @@ out_unlock: sendit: if (skb->sk) - skb->priority = skb->sk->sk_priority; + skb->priority = READ_ONCE(skb->sk->sk_priority); if (dev_queue_xmit(skb)) goto drop; sent: diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b070a89912..a852ec093f 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1284,39 +1284,6 @@ out: return err; } -#if IS_ENABLED(CONFIG_IPDDP) -static __inline__ int is_ip_over_ddp(struct sk_buff *skb) -{ - return skb->data[12] == 22; -} - -static int handle_ip_over_ddp(struct sk_buff *skb) -{ - struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0"); - struct net_device_stats *stats; - - /* This needs to be able to handle ipddp"N" devices */ - if (!dev) { - kfree_skb(skb); - return NET_RX_DROP; - } - - skb->protocol = htons(ETH_P_IP); - skb_pull(skb, 13); - skb->dev = dev; - skb_reset_transport_header(skb); - - stats = netdev_priv(dev); - stats->rx_packets++; - stats->rx_bytes += skb->len + 13; - return netif_rx(skb); /* Send the SKB up to a higher place. */ -} -#else -/* make it easy for gcc to optimize this test out, i.e. kill the code */ -#define is_ip_over_ddp(skb) 0 -#define handle_ip_over_ddp(skb) 0 -#endif - static int atalk_route_packet(struct sk_buff *skb, struct net_device *dev, struct ddpehdr *ddp, __u16 len_hops, int origlen) { @@ -1480,9 +1447,6 @@ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, return atalk_route_packet(skb, dev, ddp, len_hops, origlen); } - /* if IP over DDP is not selected this code will be optimized out */ - if (is_ip_over_ddp(skb)) - return handle_ip_over_ddp(skb); /* * Which socket - atalk_search_socket() looks for a *full match* * of the <net, node, port> tuple. diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 466353b3dd..54e7fb1a4e 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -116,8 +116,6 @@ static int atm_uevent(const struct device *cdev, struct kobj_uevent_env *env) return -ENODEV; adev = to_atm_dev(cdev); - if (!adev) - return -ENODEV; if (add_uevent_var(env, "NAME=%s%d", adev->type, adev->number)) return -ENOMEM; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 5db805d5f7..558e158c98 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -939,7 +939,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev) sock_init_data(NULL, sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1fdf4b9053..d01db89fcb 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -760,6 +760,7 @@ static int terminate_big_sync(struct hci_dev *hdev, void *data) bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", d->big, d->bis); + hci_disable_per_advertising_sync(hdev, d->bis); hci_remove_ext_adv_instance_sync(hdev, d->bis, NULL); /* Only terminate BIG if it has been created */ @@ -815,6 +816,17 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data) return 0; } +static void find_bis(struct hci_conn *conn, void *data) +{ + struct iso_list_data *d = data; + + /* Ignore if BIG doesn't match */ + if (d->big != conn->iso_qos.bcast.big) + return; + + d->count++; +} + static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn) { struct iso_list_data *d; @@ -826,10 +838,27 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c if (!d) return -ENOMEM; + memset(d, 0, sizeof(*d)); d->big = big; d->sync_handle = conn->sync_handle; - d->pa_sync_term = test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags); - d->big_sync_term = test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags); + + if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { + hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, + HCI_CONN_PA_SYNC, d); + + if (!d->count) + d->pa_sync_term = true; + + d->count = 0; + } + + if (test_and_clear_bit(HCI_CONN_BIG_SYNC, &conn->flags)) { + hci_conn_hash_list_flag(hdev, find_bis, ISO_LINK, + HCI_CONN_BIG_SYNC, d); + + if (!d->count) + d->big_sync_term = true; + } ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, terminate_big_destroy); @@ -865,12 +894,6 @@ static void bis_cleanup(struct hci_conn *conn) hci_le_terminate_big(hdev, conn); } else { - bis = hci_conn_hash_lookup_big_any_dst(hdev, - conn->iso_qos.bcast.big); - - if (bis) - return; - hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, conn); } @@ -1250,6 +1273,12 @@ void hci_conn_failed(struct hci_conn *conn, u8 status) break; } + /* In case of BIG/PA sync failed, clear conn flags so that + * the conns will be correctly cleaned up by ISO layer + */ + test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags); + test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags); + conn->state = BT_CLOSED; hci_connect_cfm(conn, status); hci_conn_del(conn); @@ -1492,6 +1521,18 @@ static int qos_set_bis(struct hci_dev *hdev, struct bt_iso_qos *qos) /* Allocate BIS if not set */ if (qos->bcast.bis == BT_ISO_QOS_BIS_UNSET) { + if (qos->bcast.big != BT_ISO_QOS_BIG_UNSET) { + conn = hci_conn_hash_lookup_big(hdev, qos->bcast.big); + + if (conn) { + /* If the BIG handle is already matched to an advertising + * handle, do not allocate a new one. + */ + qos->bcast.bis = conn->iso_qos.bcast.bis; + return 0; + } + } + /* Find an unused adv set to advertise BIS, skip instance 0x00 * since it is reserved as general purpose set. */ @@ -2145,7 +2186,7 @@ int hci_le_big_create_sync(struct hci_dev *hdev, struct hci_conn *hcon, } pdu; int err; - if (num_bis > sizeof(pdu.bis)) + if (num_bis < 0x01 || num_bis > sizeof(pdu.bis)) return -EINVAL; err = qos_set_big(hdev, qos); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 16e4427732..ef8c3bed73 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7110,7 +7110,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, { struct hci_evt_le_big_sync_estabilished *ev = data; struct hci_conn *bis; - struct hci_conn *pa_sync; int i; bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); @@ -7121,15 +7120,6 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, hci_dev_lock(hdev); - if (!ev->status) { - pa_sync = hci_conn_hash_lookup_pa_sync_big_handle(hdev, ev->handle); - if (pa_sync) - /* Also mark the BIG sync established event on the - * associated PA sync hcon - */ - set_bit(HCI_CONN_BIG_SYNC, &pa_sync->flags); - } - for (i = 0; i < ev->num_bis; i++) { u16 handle = le16_to_cpu(ev->bis[i]); __le32 interval; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 9e71362c04..97284d9b2a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1312,7 +1312,7 @@ int hci_start_ext_adv_sync(struct hci_dev *hdev, u8 instance) return hci_enable_ext_advertising_sync(hdev, instance); } -static int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) +int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) { struct hci_cp_le_set_per_adv_enable cp; struct adv_info *adv = NULL; @@ -3800,12 +3800,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev) if (lmp_bredr_capable(hdev)) { events[4] |= 0x01; /* Flow Specification Complete */ - /* Don't set Disconnect Complete when suspended as that - * would wakeup the host when disconnecting due to - * suspend. + /* Don't set Disconnect Complete and mode change when + * suspended as that would wakeup the host when disconnecting + * due to suspend. */ - if (hdev->suspended) + if (hdev->suspended) { events[0] &= 0xef; + events[2] &= 0xf7; + } } else { /* Use a different default for LE-only devices */ memset(events, 0, sizeof(events)); @@ -4264,12 +4266,12 @@ static int hci_le_set_host_feature_sync(struct hci_dev *hdev) { struct hci_cp_le_set_host_feature cp; - if (!iso_capable(hdev)) + if (!cis_capable(hdev)) return 0; memset(&cp, 0, sizeof(cp)); - /* Isochronous Channels (Host Support) */ + /* Connected Isochronous Channels (Host Support) */ cp.bit_number = 32; cp.bit_value = 1; @@ -5232,6 +5234,17 @@ static int hci_disconnect_sync(struct hci_dev *hdev, struct hci_conn *conn, if (conn->type == AMP_LINK) return hci_disconnect_phy_link_sync(hdev, conn->handle, reason); + if (test_bit(HCI_CONN_BIG_CREATED, &conn->flags)) { + /* This is a BIS connection, hci_conn_del will + * do the necessary cleanup. + */ + hci_dev_lock(hdev); + hci_conn_failed(conn, reason); + hci_dev_unlock(hdev); + + return 0; + } + memset(&cp, 0, sizeof(cp)); cp.handle = cpu_to_le16(conn->handle); cp.reason = reason; @@ -5384,21 +5397,6 @@ int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason) err = hci_reject_conn_sync(hdev, conn, reason); break; case BT_OPEN: - hci_dev_lock(hdev); - - /* Cleanup bis or pa sync connections */ - if (test_and_clear_bit(HCI_CONN_BIG_SYNC_FAILED, &conn->flags) || - test_and_clear_bit(HCI_CONN_PA_SYNC_FAILED, &conn->flags)) { - hci_conn_failed(conn, reason); - } else if (test_bit(HCI_CONN_PA_SYNC, &conn->flags) || - test_bit(HCI_CONN_BIG_SYNC, &conn->flags)) { - conn->state = BT_CLOSED; - hci_disconn_cfm(conn, reason); - hci_conn_del(conn); - } - - hci_dev_unlock(hdev); - return 0; case BT_BOUND: break; default: diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 2132a16be9..fd81289fd3 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -14,6 +14,7 @@ #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/iso.h> +#include "eir.h" static const struct proto_ops iso_sock_ops; @@ -47,11 +48,13 @@ static void iso_sock_kill(struct sock *sk); #define EIR_SERVICE_DATA_LENGTH 4 #define BASE_MAX_LENGTH (HCI_MAX_PER_AD_LENGTH - EIR_SERVICE_DATA_LENGTH) +#define EIR_BAA_SERVICE_UUID 0x1851 /* iso_pinfo flags values */ enum { BT_SK_BIG_SYNC, BT_SK_PA_SYNC, + BT_SK_PA_SYNC_TERM, }; struct iso_pinfo { @@ -80,6 +83,11 @@ static bool iso_match_sid(struct sock *sk, void *data); static bool iso_match_sync_handle(struct sock *sk, void *data); static void iso_sock_disconn(struct sock *sk); +typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); + +static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst, + iso_sock_match_t match, void *data); + /* ---- ISO timers ---- */ #define ISO_CONN_TIMEOUT (HZ * 40) #define ISO_DISCONN_TIMEOUT (HZ * 2) @@ -188,10 +196,21 @@ static void iso_chan_del(struct sock *sk, int err) sock_set_flag(sk, SOCK_ZAPPED); } +static bool iso_match_conn_sync_handle(struct sock *sk, void *data) +{ + struct hci_conn *hcon = data; + + if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) + return false; + + return hcon->sync_handle == iso_pi(sk)->sync_handle; +} + static void iso_conn_del(struct hci_conn *hcon, int err) { struct iso_conn *conn = hcon->iso_data; struct sock *sk; + struct sock *parent; if (!conn) return; @@ -207,6 +226,25 @@ static void iso_conn_del(struct hci_conn *hcon, int err) if (sk) { lock_sock(sk); + + /* While a PA sync hcon is in the process of closing, + * mark parent socket with a flag, so that any residual + * BIGInfo adv reports that arrive before PA sync is + * terminated are not processed anymore. + */ + if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) { + parent = iso_get_sock_listen(&hcon->src, + &hcon->dst, + iso_match_conn_sync_handle, + hcon); + + if (parent) { + set_bit(BT_SK_PA_SYNC_TERM, + &iso_pi(parent)->flags); + sock_put(parent); + } + } + iso_sock_clear_timer(sk); iso_chan_del(sk, err); release_sock(sk); @@ -543,8 +581,6 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc, return NULL; } -typedef bool (*iso_sock_match_t)(struct sock *sk, void *data); - /* Find socket listening: * source bdaddr (Unicast) * destination bdaddr (Broadcast only) @@ -790,8 +826,7 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr, BT_DBG("sk %p bc_sid %u bc_num_bis %u", sk, sa->iso_bc->bc_sid, sa->iso_bc->bc_num_bis); - if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc) || - sa->iso_bc->bc_num_bis < 0x01 || sa->iso_bc->bc_num_bis > 0x1f) + if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc)) return -EINVAL; bacpy(&iso_pi(sk)->dst, &sa->iso_bc->bc_bdaddr); @@ -1461,6 +1496,8 @@ static int iso_sock_getsockopt(struct socket *sock, int level, int optname, len = min_t(unsigned int, len, base_len); if (copy_to_user(optval, base, len)) err = -EFAULT; + if (put_user(len, optlen)) + err = -EFAULT; break; @@ -1756,9 +1793,20 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) /* Try to get PA sync listening socket, if it exists */ sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_pa_sync_flag, NULL); - if (!sk) + + if (!sk) { sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sync_handle, ev2); + + /* If PA Sync is in process of terminating, + * do not handle any more BIGInfo adv reports. + */ + + if (sk && test_bit(BT_SK_PA_SYNC_TERM, + &iso_pi(sk)->flags)) + return lm; + } + if (sk) { int err; @@ -1783,12 +1831,16 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags) ev3 = hci_recv_event_data(hdev, HCI_EV_LE_PER_ADV_REPORT); if (ev3) { + size_t base_len = ev3->length; + u8 *base; + sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr, iso_match_sync_handle_pa_report, ev3); - - if (sk) { - memcpy(iso_pi(sk)->base, ev3->data, ev3->length); - iso_pi(sk)->base_len = ev3->length; + base = eir_get_service_data(ev3->data, ev3->length, + EIR_BAA_SERVICE_UUID, &base_len); + if (base && sk && base_len <= sizeof(iso_pi(sk)->base)) { + memcpy(iso_pi(sk)->base, base, base_len); + iso_pi(sk)->base_len = base_len; } } else { sk = iso_get_sock_listen(&hdev->bdaddr, BDADDR_ANY, NULL, NULL); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index baeebee41c..60298975d5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6526,7 +6526,8 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (len > skb->len || !cmd->ident) { BT_DBG("corrupted command"); l2cap_sig_send_rej(conn, cmd->ident); - break; + skb_pull(skb, len > skb->len ? skb->len : len); + continue; } err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 3bdfc3f1e7..e50d3d1020 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1615,7 +1615,7 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan, return ERR_PTR(-ENOTCONN); } - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); bt_cb(skb)->l2cap.chan = chan; diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c index abbafa6194..630e302327 100644 --- a/net/bluetooth/msft.c +++ b/net/bluetooth/msft.c @@ -150,10 +150,7 @@ static bool read_supported_features(struct hci_dev *hdev, skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { - if (!skb) - skb = ERR_PTR(-EIO); - + if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read MSFT supported features (%ld)", PTR_ERR(skb)); return false; @@ -353,7 +350,7 @@ static void msft_remove_addr_filters_sync(struct hci_dev *hdev, u8 handle) skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { kfree(address_filter); continue; } @@ -442,11 +439,8 @@ static int msft_remove_monitor_sync(struct hci_dev *hdev, skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { - if (!skb) - return -EIO; + if (IS_ERR(skb)) return PTR_ERR(skb); - } return msft_le_cancel_monitor_advertisement_cb(hdev, hdev->msft_opcode, monitor, skb); @@ -559,7 +553,7 @@ static int msft_add_monitor_sync(struct hci_dev *hdev, skb = __hci_cmd_sync(hdev, hdev->msft_opcode, total_size, cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { err = PTR_ERR(skb); goto out_free; } @@ -740,10 +734,10 @@ static int msft_cancel_address_filter_sync(struct hci_dev *hdev, void *data) skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { bt_dev_err(hdev, "MSFT: Failed to cancel address (%pMR) filter", &address_filter->bdaddr); - err = -EIO; + err = PTR_ERR(skb); goto done; } kfree_skb(skb); @@ -893,7 +887,7 @@ static int msft_add_address_filter_sync(struct hci_dev *hdev, void *data) skb = __hci_cmd_sync(hdev, hdev->msft_opcode, size, cp, HCI_CMD_TIMEOUT); - if (IS_ERR_OR_NULL(skb)) { + if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to enable address %pMR filter", &address_filter->bdaddr); skb = NULL; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 37f95ea8c7..1e7ea3a4b7 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -22,11 +22,10 @@ #include <linux/debugfs.h> #include <linux/scatterlist.h> -#include <linux/crypto.h> #include <crypto/aes.h> -#include <crypto/algapi.h> #include <crypto/hash.h> #include <crypto/kpp.h> +#include <crypto/utils.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 0841f8d824..711cf5d598 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -503,9 +503,8 @@ out: * architecture dependent calling conventions. 7+ can be supported in the * future. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); + __bpf_kfunc int bpf_fentry_test1(int a) { return a + 1; @@ -543,7 +542,7 @@ struct bpf_fentry_test_t { int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg) { - asm volatile (""); + asm volatile ("": "+r"(arg)); return (long)arg; } @@ -605,7 +604,7 @@ __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) { } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(bpf_test_modify_return_ids) BTF_ID_FLAGS(func, bpf_modify_return_test) diff --git a/net/bridge/br.c b/net/bridge/br.c index a6e94ceb7c..ac19b797db 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -477,3 +477,4 @@ module_exit(br_deinit) MODULE_LICENSE("GPL"); MODULE_VERSION(BR_VERSION); MODULE_ALIAS_RTNL_LINK("bridge"); +MODULE_DESCRIPTION("Ethernet bridge driver"); diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c index 5c4c369f85..2faab44652 100644 --- a/net/bridge/br_cfm_netlink.c +++ b/net/bridge/br_cfm_netlink.c @@ -362,7 +362,7 @@ static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr, memset(&tx_info, 0, sizeof(tx_info)); - instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); + instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]); nla_memcpy(&tx_info.dmac.addr, tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC], sizeof(tx_info.dmac.addr)); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 9a5ea06236..8f40de3af1 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -92,7 +92,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } - mdst = br_mdb_get(brmctx, skb, vid); + mdst = br_mdb_entry_skb_get(brmctx, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) br_multicast_flood(mdst, skb, brmctx, false, true); @@ -472,6 +472,7 @@ static const struct net_device_ops br_netdev_ops = { .ndo_mdb_add = br_mdb_add, .ndo_mdb_del = br_mdb_del, .ndo_mdb_dump = br_mdb_dump, + .ndo_mdb_get = br_mdb_get, .ndo_bridge_getlink = br_getlink, .ndo_bridge_setlink = br_setlink, .ndo_bridge_dellink = br_dellink, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e69a872bfc..c622de5ecc 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -329,11 +329,18 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f, hlist_del_init_rcu(&f->fdb_node); rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode, br_fdb_rht_params); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags)) + atomic_dec(&br->fdb_n_learned); fdb_notify(br, f, RTM_DELNEIGH, swdev_notify); call_rcu(&f->rcu, fdb_rcu_free); } -/* Delete a local entry if no other port had the same address. */ +/* Delete a local entry if no other port had the same address. + * + * This function should only be called on entries with BR_FDB_LOCAL set, + * so even with BR_FDB_ADDED_BY_USER cleared we never need to increase + * the accounting for dynamically learned entries again. + */ static void fdb_delete_local(struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_fdb_entry *f) @@ -388,9 +395,20 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, __u16 vid, unsigned long flags) { + bool learned = !test_bit(BR_FDB_ADDED_BY_USER, &flags) && + !test_bit(BR_FDB_LOCAL, &flags); + u32 max_learned = READ_ONCE(br->fdb_max_learned); struct net_bridge_fdb_entry *fdb; int err; + if (likely(learned)) { + int n_learned = atomic_read(&br->fdb_n_learned); + + if (unlikely(max_learned && n_learned >= max_learned)) + return NULL; + __set_bit(BR_FDB_DYNAMIC_LEARNED, &flags); + } + fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (!fdb) return NULL; @@ -407,6 +425,9 @@ static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, return NULL; } + if (likely(learned)) + atomic_inc(&br->fdb_n_learned); + hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); return fdb; @@ -661,14 +682,30 @@ static int __fdb_flush_validate_ifindex(const struct net_bridge *br, return 0; } -int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, u16 vid, +static const struct nla_policy br_fdb_del_bulk_policy[NDA_MAX + 1] = { + [NDA_VLAN] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), + [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, + [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, +}; + +int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack) { - u8 ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; - struct net_bridge_fdb_flush_desc desc = { .vlan_id = vid }; + struct net_bridge_fdb_flush_desc desc = {}; + struct ndmsg *ndm = nlmsg_data(nlh); struct net_bridge_port *p = NULL; + struct nlattr *tb[NDA_MAX + 1]; struct net_bridge *br; + u8 ndm_flags; + int err; + + ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; + + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, + br_fdb_del_bulk_policy, extack); + if (err) + return err; if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); @@ -681,6 +718,9 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], br = p->br; } + if (tb[NDA_VLAN]) + desc.vlan_id = nla_get_u16(tb[NDA_VLAN]); + if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) { NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); return -EINVAL; @@ -703,7 +743,7 @@ int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask); } if (tb[NDA_IFINDEX]) { - int err, ifidx = nla_get_s32(tb[NDA_IFINDEX]); + int ifidx = nla_get_s32(tb[NDA_IFINDEX]); err = __fdb_flush_validate_ifindex(br, ifidx, extack); if (err) @@ -893,8 +933,12 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, clear_bit(BR_FDB_LOCKED, &fdb->flags); } - if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) + if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) { set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, + &fdb->flags)) + atomic_dec(&br->fdb_n_learned); + } if (unlikely(fdb_modified)) { trace_br_fdb_update(br, source, addr, vid, flags); fdb_notify(br, fdb, RTM_NEWNEIGH, true); @@ -1056,7 +1100,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (!(flags & NLM_F_CREATE)) return -ENOENT; - fdb = fdb_create(br, source, addr, vid, 0); + fdb = fdb_create(br, source, addr, vid, + BIT(BR_FDB_ADDED_BY_USER)); if (!fdb) return -ENOMEM; @@ -1069,6 +1114,10 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, WRITE_ONCE(fdb->dst, source); modified = true; } + + set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) + atomic_dec(&br->fdb_n_learned); } if (fdb_to_nud(br, fdb) != state) { @@ -1100,8 +1149,6 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, if (fdb_handle_notify(fdb, notify)) modified = true; - set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); - fdb->used = jiffies; if (modified) { if (refresh) @@ -1445,6 +1492,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (!p) set_bit(BR_FDB_LOCAL, &fdb->flags); + if ((swdev_notify || !p) && + test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) + atomic_dec(&br->fdb_n_learned); + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index c729528b5e..f21097e734 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -175,7 +175,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb switch (pkt_type) { case BR_PKT_MULTICAST: - mdst = br_mdb_get(brmctx, skb, vid); + mdst = br_mdb_entry_skb_get(brmctx, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) { if ((mdst && mdst->host_joined) || diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 7305f5f821..8cc526067b 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -323,9 +323,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_bridge_mdb_entry *mp; struct nlattr *nest, *nest2; - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) - return 0; - nest = nla_nest_start_noflag(skb, MDBA_MDB); if (nest == NULL) return -EMSGSIZE; @@ -453,13 +450,15 @@ cancel: return -EMSGSIZE; } -static size_t rtnl_mdb_nlmsg_size(struct net_bridge_port_group *pg) +static size_t rtnl_mdb_nlmsg_pg_size(const struct net_bridge_port_group *pg) { - size_t nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + - nla_total_size(sizeof(struct br_mdb_entry)) + - nla_total_size(sizeof(u32)); struct net_bridge_group_src *ent; - size_t addr_size = 0; + size_t nlmsg_size, addr_size = 0; + + /* MDBA_MDB_ENTRY_INFO */ + nlmsg_size = nla_total_size(sizeof(struct br_mdb_entry)) + + /* MDBA_MDB_EATTR_TIMER */ + nla_total_size(sizeof(u32)); if (!pg) goto out; @@ -507,6 +506,17 @@ out: return nlmsg_size; } +static size_t rtnl_mdb_nlmsg_size(const struct net_bridge_port_group *pg) +{ + return NLMSG_ALIGN(sizeof(struct br_port_msg)) + + /* MDBA_MDB */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY */ + nla_total_size(0) + + /* Port group entry */ + rtnl_mdb_nlmsg_pg_size(pg); +} + void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp, struct net_bridge_port_group *pg, @@ -1401,3 +1411,161 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[], br_mdb_config_fini(&cfg); return err; } + +static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = { + [MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY, + sizeof(struct in_addr), + sizeof(struct in6_addr)), +}; + +static int br_mdb_get_parse(struct net_device *dev, struct nlattr *tb[], + struct br_ip *group, struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(tb[MDBA_GET_ENTRY]); + struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1]; + int err; + + if (!tb[MDBA_GET_ENTRY_ATTRS]) { + __mdb_entry_to_br_ip(entry, group, NULL); + return 0; + } + + err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX, + tb[MDBA_GET_ENTRY_ATTRS], br_mdbe_attrs_get_pol, + extack); + if (err) + return err; + + if (mdbe_attrs[MDBE_ATTR_SOURCE] && + !is_valid_mdb_source(mdbe_attrs[MDBE_ATTR_SOURCE], + entry->addr.proto, extack)) + return -EINVAL; + + __mdb_entry_to_br_ip(entry, group, mdbe_attrs); + + return 0; +} + +static struct sk_buff * +br_mdb_get_reply_alloc(const struct net_bridge_mdb_entry *mp) +{ + struct net_bridge_port_group *pg; + size_t nlmsg_size; + + nlmsg_size = NLMSG_ALIGN(sizeof(struct br_port_msg)) + + /* MDBA_MDB */ + nla_total_size(0) + + /* MDBA_MDB_ENTRY */ + nla_total_size(0); + + if (mp->host_joined) + nlmsg_size += rtnl_mdb_nlmsg_pg_size(NULL); + + for (pg = mlock_dereference(mp->ports, mp->br); pg; + pg = mlock_dereference(pg->next, mp->br)) + nlmsg_size += rtnl_mdb_nlmsg_pg_size(pg); + + return nlmsg_new(nlmsg_size, GFP_ATOMIC); +} + +static int br_mdb_get_reply_fill(struct sk_buff *skb, + struct net_bridge_mdb_entry *mp, u32 portid, + u32 seq) +{ + struct nlattr *mdb_nest, *mdb_entry_nest; + struct net_bridge_port_group *pg; + struct br_port_msg *bpm; + struct nlmsghdr *nlh; + int err; + + nlh = nlmsg_put(skb, portid, seq, RTM_NEWMDB, sizeof(*bpm), 0); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + memset(bpm, 0, sizeof(*bpm)); + bpm->family = AF_BRIDGE; + bpm->ifindex = mp->br->dev->ifindex; + mdb_nest = nla_nest_start_noflag(skb, MDBA_MDB); + if (!mdb_nest) { + err = -EMSGSIZE; + goto cancel; + } + mdb_entry_nest = nla_nest_start_noflag(skb, MDBA_MDB_ENTRY); + if (!mdb_entry_nest) { + err = -EMSGSIZE; + goto cancel; + } + + if (mp->host_joined) { + err = __mdb_fill_info(skb, mp, NULL); + if (err) + goto cancel; + } + + for (pg = mlock_dereference(mp->ports, mp->br); pg; + pg = mlock_dereference(pg->next, mp->br)) { + err = __mdb_fill_info(skb, mp, pg); + if (err) + goto cancel; + } + + nla_nest_end(skb, mdb_entry_nest); + nla_nest_end(skb, mdb_nest); + nlmsg_end(skb, nlh); + + return 0; + +cancel: + nlmsg_cancel(skb, nlh); + return err; +} + +int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, + struct netlink_ext_ack *extack) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mdb_entry *mp; + struct sk_buff *skb; + struct br_ip group; + int err; + + err = br_mdb_get_parse(dev, tb, &group, extack); + if (err) + return err; + + /* Hold the multicast lock to ensure that the MDB entry does not change + * between the time the reply size is determined and when the reply is + * filled in. + */ + spin_lock_bh(&br->multicast_lock); + + mp = br_mdb_ip_get(br, &group); + if (!mp) { + NL_SET_ERR_MSG_MOD(extack, "MDB entry not found"); + err = -ENOENT; + goto unlock; + } + + skb = br_mdb_get_reply_alloc(mp); + if (!skb) { + err = -ENOMEM; + goto unlock; + } + + err = br_mdb_get_reply_fill(skb, mp, portid, seq); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to fill MDB get reply"); + goto free; + } + + spin_unlock_bh(&br->multicast_lock); + + return rtnl_unicast(skb, dev_net(dev), portid); + +free: + kfree_skb(skb); +unlock: + spin_unlock_bh(&br->multicast_lock); + return err; +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 96d1fc78dd..2d7b732429 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -145,8 +145,9 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br, } #endif -struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, - struct sk_buff *skb, u16 vid) +struct net_bridge_mdb_entry * +br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, + u16 vid) { struct net_bridge *br = brmctx->br; struct br_ip ip; @@ -1761,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t) } #endif +static void br_multicast_query_delay_expired(struct timer_list *t) +{ +} + static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) @@ -3197,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, unsigned long max_delay) { if (!timer_pending(&query->timer)) - query->delay_time = jiffies + max_delay; + mod_timer(&query->delay_timer, jiffies + max_delay); mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } @@ -4040,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_querier_interval = 255 * HZ; brmctx->multicast_membership_interval = 260 * HZ; - brmctx->ip4_other_query.delay_time = 0; brmctx->ip4_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; - brmctx->ip6_other_query.delay_time = 0; brmctx->ip6_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock); #endif @@ -4055,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip4_multicast_local_router_expired, 0); timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); + timer_setup(&brmctx->ip4_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) @@ -4062,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip6_multicast_local_router_expired, 0); timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); + timer_setup(&brmctx->ip6_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif @@ -4196,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { del_timer_sync(&brmctx->ip4_mc_router_timer); del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_other_query.delay_timer); del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&brmctx->ip6_mc_router_timer); del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_other_query.delay_timer); del_timer_sync(&brmctx->ip6_own_query.timer); #endif } @@ -4642,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) max_delay = brmctx->multicast_query_response_interval; if (!timer_pending(&brmctx->ip4_other_query.timer)) - brmctx->ip4_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip4_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) if (!timer_pending(&brmctx->ip6_other_query.timer)) - brmctx->ip6_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip6_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 92dae4c492..ed17208907 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -501,11 +501,11 @@ static unsigned int br_nf_pre_routing(void *priv, struct brnf_net *brnet; if (unlikely(!pskb_may_pull(skb, len))) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); p = br_port_get_rcu(state->in); if (p == NULL) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); br = p->br; brnet = net_generic(state->net, brnf_net_id); @@ -516,7 +516,7 @@ static unsigned int br_nf_pre_routing(void *priv, return NF_ACCEPT; if (!ipv6_mod_enabled()) { pr_warn_once("Module ipv6 is disabled, so call_ip6tables is not supported."); - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IPV6DISABLED, 0); } nf_bridge_pull_encap_header_rcsum(skb); @@ -533,12 +533,12 @@ static unsigned int br_nf_pre_routing(void *priv, nf_bridge_pull_encap_header_rcsum(skb); if (br_validate_ipv4(state->net, skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); if (!nf_bridge_alloc(skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); if (!setup_pre_routing(skb, state->net)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge = nf_bridge_info_get(skb); nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; @@ -589,18 +589,12 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff } -/* This is the 'purely bridged' case. For IP, we pass the packet to - * netfilter with indev and outdev set to the bridge device, - * but we are still able to filter on the 'real' indev/outdev - * because of the physdev module. For ARP, indev and outdev are the - * bridge ports. */ -static unsigned int br_nf_forward_ip(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state) +static unsigned int br_nf_forward_ip(struct sk_buff *skb, + const struct nf_hook_state *state, + u8 pf) { struct nf_bridge_info *nf_bridge; struct net_device *parent; - u_int8_t pf; nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) @@ -609,24 +603,15 @@ static unsigned int br_nf_forward_ip(void *priv, /* Need exclusive nf_bridge_info since we might have multiple * different physoutdevs. */ if (!nf_bridge_unshare(skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); nf_bridge = nf_bridge_info_get(skb); if (!nf_bridge) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); parent = bridge_parent(state->out); if (!parent) - return NF_DROP; - - if (IS_IP(skb) || is_vlan_ip(skb, state->net) || - is_pppoe_ip(skb, state->net)) - pf = NFPROTO_IPV4; - else if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || - is_pppoe_ipv6(skb, state->net)) - pf = NFPROTO_IPV6; - else - return NF_ACCEPT; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge_pull_encap_header(skb); @@ -637,21 +622,20 @@ static unsigned int br_nf_forward_ip(void *priv, if (pf == NFPROTO_IPV4) { if (br_validate_ipv4(state->net, skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; - } - - if (pf == NFPROTO_IPV6) { + skb->protocol = htons(ETH_P_IP); + } else if (pf == NFPROTO_IPV6) { if (br_validate_ipv6(state->net, skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; + skb->protocol = htons(ETH_P_IPV6); + } else { + WARN_ON_ONCE(1); + return NF_DROP; } nf_bridge->physoutdev = skb->dev; - if (pf == NFPROTO_IPV4) - skb->protocol = htons(ETH_P_IP); - else - skb->protocol = htons(ETH_P_IPV6); NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb, brnf_get_logical_dev(skb, state->in, state->net), @@ -660,8 +644,7 @@ static unsigned int br_nf_forward_ip(void *priv, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(void *priv, - struct sk_buff *skb, +static unsigned int br_nf_forward_arp(struct sk_buff *skb, const struct nf_hook_state *state) { struct net_bridge_port *p; @@ -678,14 +661,11 @@ static unsigned int br_nf_forward_arp(void *priv, if (!brnet->call_arptables && !br_opt_get(br, BROPT_NF_CALL_ARPTABLES)) return NF_ACCEPT; - if (!IS_ARP(skb)) { - if (!is_vlan_arp(skb, state->net)) - return NF_ACCEPT; + if (is_vlan_arp(skb, state->net)) nf_bridge_pull_encap_header(skb); - } if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_PKT_TOO_SMALL, 0); if (arp_hdr(skb)->ar_pln != 4) { if (is_vlan_arp(skb, state->net)) @@ -699,6 +679,28 @@ static unsigned int br_nf_forward_arp(void *priv, return NF_STOLEN; } +/* This is the 'purely bridged' case. For IP, we pass the packet to + * netfilter with indev and outdev set to the bridge device, + * but we are still able to filter on the 'real' indev/outdev + * because of the physdev module. For ARP, indev and outdev are the + * bridge ports. + */ +static unsigned int br_nf_forward(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + if (IS_IP(skb) || is_vlan_ip(skb, state->net) || + is_pppoe_ip(skb, state->net)) + return br_nf_forward_ip(skb, state, NFPROTO_IPV4); + if (IS_IPV6(skb) || is_vlan_ipv6(skb, state->net) || + is_pppoe_ipv6(skb, state->net)) + return br_nf_forward_ip(skb, state, NFPROTO_IPV6); + if (IS_ARP(skb) || is_vlan_arp(skb, state->net)) + return br_nf_forward_arp(skb, state); + + return NF_ACCEPT; +} + static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) { struct brnf_frag_data *data; @@ -850,7 +852,7 @@ static unsigned int br_nf_post_routing(void *priv, return NF_ACCEPT; if (!realoutdev) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); if (IS_IP(skb) || is_vlan_ip(skb, state->net) || is_pppoe_ip(skb, state->net)) @@ -963,13 +965,7 @@ static const struct nf_hook_ops br_nf_ops[] = { .priority = NF_BR_PRI_BRNF, }, { - .hook = br_nf_forward_ip, - .pf = NFPROTO_BRIDGE, - .hooknum = NF_BR_FORWARD, - .priority = NF_BR_PRI_BRNF - 1, - }, - { - .hook = br_nf_forward_arp, + .hook = br_nf_forward, .pf = NFPROTO_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_BRNF, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index ad268bd19d..e0421eaa3a 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -167,13 +167,13 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, struct nf_bridge_info *nf_bridge; if (br_validate_ipv6(state->net, skb)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_IP_INHDR, 0); nf_bridge = nf_bridge_alloc(skb); if (!nf_bridge) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_NOMEM, 0); if (!setup_pre_routing(skb, state->net)) - return NF_DROP; + return NF_DROP_REASON(skb, SKB_DROP_REASON_DEV_READY, 0); nf_bridge = nf_bridge_info_get(skb); nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 10f0d33d8c..5ad4abfcb7 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1229,6 +1229,8 @@ static size_t br_port_get_slave_size(const struct net_device *brdev, } static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { + [IFLA_BR_UNSPEC] = { .strict_start_type = + IFLA_BR_FDB_N_LEARNED }, [IFLA_BR_FORWARD_DELAY] = { .type = NLA_U32 }, [IFLA_BR_HELLO_TIME] = { .type = NLA_U32 }, [IFLA_BR_MAX_AGE] = { .type = NLA_U32 }, @@ -1265,6 +1267,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 }, [IFLA_BR_MULTI_BOOLOPT] = NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)), + [IFLA_BR_FDB_N_LEARNED] = { .type = NLA_REJECT }, + [IFLA_BR_FDB_MAX_LEARNED] = { .type = NLA_U32 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1539,6 +1543,12 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } + if (data[IFLA_BR_FDB_MAX_LEARNED]) { + u32 val = nla_get_u32(data[IFLA_BR_FDB_MAX_LEARNED]); + + WRITE_ONCE(br->fdb_max_learned, val); + } + return 0; } @@ -1593,6 +1603,8 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */ nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_GC_TIMER */ nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_N_LEARNED */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_FDB_MAX_LEARNED */ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */ @@ -1668,7 +1680,10 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED, br->topology_change_detected) || nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) || - nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm)) + nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm) || + nla_put_u32(skb, IFLA_BR_FDB_N_LEARNED, + atomic_read(&br->fdb_n_learned)) || + nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned)) return -EMSGSIZE; #ifdef CONFIG_BRIDGE_VLAN_FILTERING diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index a1f4acfa69..f317d8295b 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -78,7 +78,7 @@ struct bridge_mcast_own_query { /* other querier */ struct bridge_mcast_other_query { struct timer_list timer; - unsigned long delay_time; + struct timer_list delay_timer; }; /* selected querier */ @@ -274,6 +274,7 @@ enum { BR_FDB_NOTIFY, BR_FDB_NOTIFY_INACTIVE, BR_FDB_LOCKED, + BR_FDB_DYNAMIC_LEARNED, }; struct net_bridge_fdb_key { @@ -555,6 +556,9 @@ struct net_bridge { struct kobject *ifobj; u32 auto_cnt; + atomic_t fdb_n_learned; + u32 fdb_max_learned; + #ifdef CONFIG_NET_SWITCHDEV /* Counter used to make sure that hardware domains get unique * identifiers in case a bridge spans multiple switchdev instances. @@ -847,8 +851,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, struct netlink_ext_ack *extack); -int br_fdb_delete_bulk(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, u16 vid, +int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, @@ -952,8 +955,9 @@ int br_multicast_rcv(struct net_bridge_mcast **brmctx, struct net_bridge_mcast_port **pmctx, struct net_bridge_vlan *vlan, struct sk_buff *skb, u16 vid); -struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, - struct sk_buff *skb, u16 vid); +struct net_bridge_mdb_entry * +br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, + u16 vid); int br_multicast_add_port(struct net_bridge_port *port); void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); @@ -1018,6 +1022,8 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); +int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, + struct netlink_ext_ack *extack); void br_multicast_host_join(const struct net_bridge_mcast *brmctx, struct net_bridge_mdb_entry *mp, bool notify); void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); @@ -1149,7 +1155,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, own_querier_enabled = false; } - return time_is_before_jiffies(querier->delay_time) && + return !timer_pending(&querier->delay_timer) && (own_querier_enabled || timer_pending(&querier->timer)); } @@ -1342,8 +1348,9 @@ static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx, return 0; } -static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge_mcast *brmctx, - struct sk_buff *skb, u16 vid) +static inline struct net_bridge_mdb_entry * +br_mdb_entry_skb_get(struct net_bridge_mcast *brmctx, struct sk_buff *skb, + u16 vid) { return NULL; } @@ -1427,6 +1434,13 @@ static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb, return 0; } +static inline int br_mdb_get(struct net_device *dev, struct nlattr *tb[], + u32 portid, u32 seq, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline int br_mdb_hash_init(struct net_bridge *br) { return 0; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index ee84e783e1..7b41ee8740 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev, } static int br_switchdev_mdb_queue_one(struct list_head *mdb_list, + struct net_device *dev, + unsigned long action, enum switchdev_obj_id id, const struct net_bridge_mdb_entry *mp, struct net_device *orig_dev) { - struct switchdev_obj_port_mdb *mdb; + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = id, + .orig_dev = orig_dev, + }, + }; + struct switchdev_obj_port_mdb *pmdb; - mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC); - if (!mdb) - return -ENOMEM; + br_switchdev_mdb_populate(&mdb, mp); + + if (action == SWITCHDEV_PORT_OBJ_ADD && + switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) { + /* This event is already in the deferred queue of + * events, so this replay must be elided, lest the + * driver receives duplicate events for it. This can + * only happen when replaying additions, since + * modifications are always immediately visible in + * br->mdb_list, whereas actual event delivery may be + * delayed. + */ + return 0; + } - mdb->obj.id = id; - mdb->obj.orig_dev = orig_dev; - br_switchdev_mdb_populate(mdb, mp); - list_add_tail(&mdb->obj.list, mdb_list); + pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC); + if (!pmdb) + return -ENOMEM; + list_add_tail(&pmdb->obj.list, mdb_list); return 0; } @@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev, if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) return 0; - /* We cannot walk over br->mdb_list protected just by the rtnl_mutex, - * because the write-side protection is br->multicast_lock. But we - * need to emulate the [ blocking ] calling context of a regular - * switchdev event, so since both br->multicast_lock and RCU read side - * critical sections are atomic, we have no choice but to pick the RCU - * read side lock, queue up all our events, leave the critical section - * and notify switchdev from blocking context. + if (adding) + action = SWITCHDEV_PORT_OBJ_ADD; + else + action = SWITCHDEV_PORT_OBJ_DEL; + + /* br_switchdev_mdb_queue_one() will take care to not queue a + * replay of an event that is already pending in the switchdev + * deferred queue. In order to safely determine that, there + * must be no new deferred MDB notifications enqueued for the + * duration of the MDB scan. Therefore, grab the write-side + * lock to avoid racing with any concurrent IGMP/MLD snooping. */ - rcu_read_lock(); + spin_lock_bh(&br->multicast_lock); - hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { + hlist_for_each_entry(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group __rcu * const *pp; const struct net_bridge_port_group *p; if (mp->host_joined) { - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_HOST_MDB, mp, br_dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } - for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->key.port->dev != dev) continue; - err = br_switchdev_mdb_queue_one(&mdb_list, + err = br_switchdev_mdb_queue_one(&mdb_list, dev, action, SWITCHDEV_OBJ_ID_PORT_MDB, mp, dev); if (err) { - rcu_read_unlock(); + spin_unlock_bh(&br->multicast_lock); goto out_free_mdb; } } } - rcu_read_unlock(); - - if (adding) - action = SWITCHDEV_PORT_OBJ_ADD; - else - action = SWITCHDEV_PORT_OBJ_DEL; + spin_unlock_bh(&br->multicast_lock); list_for_each_entry(obj, &mdb_list, list) { err = br_switchdev_mdb_replay_one(nb, dev, @@ -786,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL); + + /* Make sure that the device leaving this bridge has seen all + * relevant events before it is disassociated. In the normal + * case, when the device is directly attached to the bridge, + * this is covered by del_nbp(). If the association was indirect + * however, e.g. via a team or bond, and the device is leaving + * that intermediate device, then the bridge port remains in + * place. + */ + switchdev_deferred_process(); } /* Let the bridge know that this port is offloaded, so that it can assign a diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8f19253024..7413602195 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -135,3 +135,4 @@ static void __exit ebtable_broute_fini(void) module_init(ebtable_broute_init); module_exit(ebtable_broute_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Force packets to be routed instead of bridged"); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 278f324e67..dacd81b12e 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -116,3 +116,4 @@ static void __exit ebtable_filter_fini(void) module_init(ebtable_filter_init); module_exit(ebtable_filter_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy filter table"); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 9066f7f376..0f2a8c6118 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -116,3 +116,4 @@ static void __exit ebtable_nat_fini(void) module_init(ebtable_nat_init); module_exit(ebtable_nat_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy stateless nat table"); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index aa23479b20..99d82676f7 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -2595,3 +2595,4 @@ EXPORT_SYMBOL(ebt_do_table); module_init(ebtables_init); module_exit(ebtables_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ebtables legacy core"); diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 0fcf357ea7..abb090f94e 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -416,3 +416,4 @@ module_exit(nf_conntrack_l3proto_bridge_fini); MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE)); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking"); diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 16af1a7f80..31a93cae51 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -86,7 +86,7 @@ struct j1939_priv { unsigned int tp_max_packet_size; /* lock for j1939_socks list */ - spinlock_t j1939_socks_lock; + rwlock_t j1939_socks_lock; struct list_head j1939_socks; struct kref rx_kref; @@ -301,6 +301,7 @@ struct j1939_sock { int ifindex; struct j1939_addr addr; + spinlock_t filters_lock; struct j1939_filter *filters; int nfilters; pgn_t pgn_rx_filter; diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index ecff1c947d..a6fb89fa62 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) return ERR_PTR(-ENOMEM); j1939_tp_init(priv); - spin_lock_init(&priv->j1939_socks_lock); + rwlock_init(&priv->j1939_socks_lock); INIT_LIST_HEAD(&priv->j1939_socks); mutex_lock(&j1939_netdev_lock); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b28c976f52..305dd72c84 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) jsk->state |= J1939_SOCK_BOUND; j1939_priv_get(priv); - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_add_tail(&jsk->list, &priv->j1939_socks); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); } static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) { - spin_lock_bh(&priv->j1939_socks_lock); + write_lock_bh(&priv->j1939_socks_lock); list_del_init(&jsk->list); - spin_unlock_bh(&priv->j1939_socks_lock); + write_unlock_bh(&priv->j1939_socks_lock); j1939_priv_put(priv); jsk->state &= ~J1939_SOCK_BOUND; @@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk, static bool j1939_sk_match_filter(struct j1939_sock *jsk, const struct j1939_sk_buff_cb *skcb) { - const struct j1939_filter *f = jsk->filters; - int nfilter = jsk->nfilters; + const struct j1939_filter *f; + int nfilter; + + spin_lock_bh(&jsk->filters_lock); + + f = jsk->filters; + nfilter = jsk->nfilters; if (!nfilter) /* receive all when no filters are assigned */ - return true; + goto filter_match_found; for (; nfilter; ++f, --nfilter) { if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) @@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk, continue; if ((skcb->addr.src_name & f->name_mask) != f->name) continue; - return true; + goto filter_match_found; } + + spin_unlock_bh(&jsk->filters_lock); return false; + +filter_match_found: + spin_unlock_bh(&jsk->filters_lock); + return true; } static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, @@ -329,13 +340,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) struct j1939_sock *jsk; bool match = false; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { match = j1939_sk_recv_match_one(jsk, skcb); if (match) break; } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); return match; } @@ -344,11 +355,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) { struct j1939_sock *jsk; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { j1939_sk_recv_one(jsk, skb); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static void j1939_sk_sock_destruct(struct sock *sk) @@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk) atomic_set(&jsk->skb_pending, 0); spin_lock_init(&jsk->sk_session_queue_lock); INIT_LIST_HEAD(&jsk->sk_session_queue); + spin_lock_init(&jsk->filters_lock); /* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */ sock_set_flag(sk, SOCK_RCU_FREE); @@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, } lock_sock(&jsk->sk); + spin_lock_bh(&jsk->filters_lock); ofilters = jsk->filters; jsk->filters = filters; jsk->nfilters = count; + spin_unlock_bh(&jsk->filters_lock); release_sock(&jsk->sk); kfree(ofilters); return 0; @@ -884,7 +898,7 @@ static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev, skcb = j1939_skb_to_cb(skb); memset(skcb, 0, sizeof(*skcb)); skcb->addr = jsk->addr; - skcb->priority = j1939_prio(sk->sk_priority); + skcb->priority = j1939_prio(READ_ONCE(sk->sk_priority)); if (msg->msg_name) { struct sockaddr_can *addr = msg->msg_name; @@ -1080,12 +1094,12 @@ void j1939_sk_errqueue(struct j1939_session *session, } /* spread RX notifications to all sockets subscribed to this session */ - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { if (j1939_sk_recv_match_one(jsk, &session->skcb)) __j1939_sk_errqueue(session, &jsk->sk, type); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); }; void j1939_sk_send_loop_abort(struct sock *sk, int err) @@ -1273,7 +1287,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) struct j1939_sock *jsk; int error_code = ENETDOWN; - spin_lock_bh(&priv->j1939_socks_lock); + read_lock_bh(&priv->j1939_socks_lock); list_for_each_entry(jsk, &priv->j1939_socks, list) { jsk->sk.sk_err = error_code; if (!sock_flag(&jsk->sk, SOCK_DEAD)) @@ -1281,7 +1295,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) j1939_sk_queue_drop_all(priv, jsk, error_code); } - spin_unlock_bh(&priv->j1939_socks_lock); + read_unlock_bh(&priv->j1939_socks_lock); } static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, diff --git a/net/can/raw.c b/net/can/raw.c index d50c3f3d89..e6b822624b 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -493,8 +493,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) out_put_dev: /* remove potential reference from dev_get_by_index() */ - if (dev) - dev_put(dev); + dev_put(dev); out: release_sock(sk); rtnl_unlock(); @@ -881,7 +880,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } skb->dev = dev; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index f9a50d7f0d..0cb61c76b9 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con) static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { /* Initialize data cursor if it's not a sparse read */ - if (!msg->sparse_read) - ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); + u64 len = msg->sparse_read_total ? : data_len; + + ceph_msg_data_cursor_init(&msg->cursor, msg, len); } /* @@ -991,7 +992,7 @@ static inline int read_partial_message_section(struct ceph_connection *con, return read_partial_message_chunk(con, section, sec_len, crc); } -static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) +static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); @@ -1026,7 +1027,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) return 1; } -static int read_sparse_msg_data(struct ceph_connection *con) +static int read_partial_sparse_msg_data(struct ceph_connection *con) { struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); @@ -1036,31 +1037,31 @@ static int read_sparse_msg_data(struct ceph_connection *con) if (do_datacrc) crc = con->in_data_crc; - do { + while (cursor->total_resid) { if (con->v1.in_sr_kvec.iov_base) ret = read_partial_message_chunk(con, &con->v1.in_sr_kvec, con->v1.in_sr_len, &crc); else if (cursor->sr_resid > 0) - ret = read_sparse_msg_extent(con, &crc); - - if (ret <= 0) { - if (do_datacrc) - con->in_data_crc = crc; - return ret; - } + ret = read_partial_sparse_msg_extent(con, &crc); + if (ret <= 0) + break; memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); ret = con->ops->sparse_read(con, cursor, (char **)&con->v1.in_sr_kvec.iov_base); + if (ret <= 0) { + ret = ret ? ret : 1; /* must return > 0 to indicate success */ + break; + } con->v1.in_sr_len = ret; - } while (ret > 0); + } if (do_datacrc) con->in_data_crc = crc; - return ret < 0 ? ret : 1; /* must return > 0 to indicate success */ + return ret; } static int read_partial_msg_data(struct ceph_connection *con) @@ -1253,8 +1254,8 @@ static int read_partial_message(struct ceph_connection *con) if (!m->num_data_items) return -EIO; - if (m->sparse_read) - ret = read_sparse_msg_data(con); + if (m->sparse_read_total) + ret = read_partial_sparse_msg_data(con); else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) ret = read_partial_msg_data_bounce(con); else diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index d09a39ff2c..a0ca5414b3 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -733,8 +733,6 @@ static int setup_crypto(struct ceph_connection *con, return ret; } - WARN_ON((unsigned long)session_key & - crypto_shash_alignmask(con->v2.hmac_tfm)); ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key, session_key_len); if (ret) { @@ -816,8 +814,6 @@ static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs, goto out; for (i = 0; i < kvec_cnt; i++) { - WARN_ON((unsigned long)kvecs[i].iov_base & - crypto_shash_alignmask(con->v2.hmac_tfm)); ret = crypto_shash_update(desc, kvecs[i].iov_base, kvecs[i].iov_len); if (ret) @@ -1132,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con) struct sg_table enc_sgt = {}; struct sg_table sgt = {}; struct page **pages = NULL; - bool sparse = con->in_msg->sparse_read; + bool sparse = !!con->in_msg->sparse_read_total; int dpos = 0; int tail_len; int ret; @@ -2064,7 +2060,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con) } if (data_len(msg)) { - if (msg->sparse_read) + if (msg->sparse_read_total) con->v2.in_state = IN_S_PREPARE_SPARSE_DATA; else con->v2.in_state = IN_S_PREPARE_READ_DATA; diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index faabad6603..f263f7e91a 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1136,6 +1136,7 @@ static int build_initial_monmap(struct ceph_mon_client *monc) GFP_KERNEL); if (!monc->monmap) return -ENOMEM; + monc->monmap->num_mon = num_mon; for (i = 0; i < num_mon; i++) { struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i]; @@ -1147,7 +1148,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc) inst->name.type = CEPH_ENTITY_TYPE_MON; inst->name.num = cpu_to_le64(i); } - monc->monmap->num_mon = num_mon; return 0; } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d3a759e052..3babcd5e65 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, } m = ceph_msg_get(req->r_reply); - m->sparse_read = (bool)srlen; + m->sparse_read_total = srlen; dout("get_reply tid %lld %p\n", tid, m); @@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con, } if (o->o_sparse_op_idx < 0) { - u64 srlen = sparse_data_requested(req); - - dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n", - __func__, o->o_osd, srlen); - ceph_msg_data_cursor_init(cursor, con->in_msg, srlen); + dout("%s: [%d] starting new sparse read req\n", + __func__, o->o_osd); } else { u64 end; @@ -5859,8 +5856,8 @@ static int osd_sparse_read(struct ceph_connection *con, struct ceph_osd *o = con->private; struct ceph_sparse_read *sr = &o->o_sparse_read; u32 count = sr->sr_count; - u64 eoff, elen; - int ret; + u64 eoff, elen, len = 0; + int i, ret; switch (sr->sr_state) { case CEPH_SPARSE_READ_HDR: @@ -5912,8 +5909,20 @@ next_op: convert_extent_map(sr); ret = sizeof(sr->sr_datalen); *pbuf = (char *)&sr->sr_datalen; - sr->sr_state = CEPH_SPARSE_READ_DATA; + sr->sr_state = CEPH_SPARSE_READ_DATA_PRE; break; + case CEPH_SPARSE_READ_DATA_PRE: + /* Convert sr_datalen to host-endian */ + sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen); + for (i = 0; i < count; i++) + len += sr->sr_extent[i].len; + if (sr->sr_datalen != len) { + pr_warn_ratelimited("data len %u != extent len %llu\n", + sr->sr_datalen, len); + return -EREMOTEIO; + } + sr->sr_state = CEPH_SPARSE_READ_DATA; + fallthrough; case CEPH_SPARSE_READ_DATA: if (sr->sr_index >= count) { sr->sr_state = CEPH_SPARSE_READ_HDR; diff --git a/net/core/Makefile b/net/core/Makefile index 731db2eaa6..0cb734cbc2 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -40,3 +40,4 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o +obj-$(CONFIG_NET_TEST) += gso_test.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 176eb58347..a8b625abe2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -50,7 +50,7 @@ #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/pagemap.h> -#include <linux/uio.h> +#include <linux/iov_iter.h> #include <linux/indirect_call_wrapper.h> #include <net/protocol.h> @@ -61,6 +61,7 @@ #include <net/tcp_states.h> #include <trace/events/skb.h> #include <net/busy_poll.h> +#include <crypto/hash.h> /* * Is a socket 'connection oriented' ? @@ -489,6 +490,24 @@ short_copy: return 0; } +static size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, + struct iov_iter *i) +{ +#ifdef CONFIG_CRYPTO_HASH + struct ahash_request *hash = hashp; + struct scatterlist sg; + size_t copied; + + copied = copy_to_iter(addr, bytes, i); + sg_init_one(&sg, addr, copied); + ahash_request_set_crypt(hash, &sg, NULL, copied); + crypto_ahash_update(hash); + return copied; +#else + return 0; +#endif +} + /** * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator * and update a hash. @@ -716,6 +735,60 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) } EXPORT_SYMBOL(zerocopy_sg_from_iter); +static __always_inline +size_t copy_to_user_iter_csum(void __user *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + __wsum next, *csum = priv2; + + next = csum_and_copy_to_user(from + progress, iter_to, len); + *csum = csum_block_add(*csum, next, progress); + return next ? 0 : len; +} + +static __always_inline +size_t memcpy_to_iter_csum(void *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + __wsum *csum = priv2; + __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len); + + *csum = csum_block_add(*csum, next, progress); + return 0; +} + +struct csum_state { + __wsum csum; + size_t off; +}; + +static size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, + struct iov_iter *i) +{ + struct csum_state *csstate = _csstate; + __wsum sum; + + if (WARN_ON_ONCE(i->data_source)) + return 0; + if (unlikely(iov_iter_is_discard(i))) { + // can't use csum_memcpy() for that one - data is not copied + csstate->csum = csum_block_add(csstate->csum, + csum_partial(addr, bytes, 0), + csstate->off); + csstate->off += bytes; + return bytes; + } + + sum = csum_shift(csstate->csum, csstate->off); + + bytes = iterate_and_advance2(i, bytes, (void *)addr, &sum, + copy_to_user_iter_csum, + memcpy_to_iter_csum); + csstate->csum = csum_shift(sum, csstate->off); + csstate->off += bytes; + return bytes; +} + /** * skb_copy_and_csum_datagram - Copy datagram to an iovec iterator * and update a checksum. diff --git a/net/core/dev.c b/net/core/dev.c index d72a4ff689..add22ca0df 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1057,7 +1057,7 @@ EXPORT_SYMBOL(dev_valid_name); * __dev_alloc_name - allocate a name for a device * @net: network namespace to allocate the device name in * @name: name format string - * @buf: scratch buffer and result name string + * @res: result name string * * Passed a format string - eg "lt%d" it will try and find a suitable * id. It scans list of devices to build up a free map, then chooses @@ -1068,106 +1068,81 @@ EXPORT_SYMBOL(dev_valid_name); * Returns the number of the unit assigned or a negative errno code. */ -static int __dev_alloc_name(struct net *net, const char *name, char *buf) +static int __dev_alloc_name(struct net *net, const char *name, char *res) { int i = 0; const char *p; const int max_netdevices = 8*PAGE_SIZE; unsigned long *inuse; struct net_device *d; + char buf[IFNAMSIZ]; - if (!dev_valid_name(name)) - return -EINVAL; - + /* Verify the string as this thing may have come from the user. + * There must be one "%d" and no other "%" characters. + */ p = strchr(name, '%'); - if (p) { - /* - * Verify the string as this thing may have come from - * the user. There must be either one "%d" and no other "%" - * characters. - */ - if (p[1] != 'd' || strchr(p + 2, '%')) - return -EINVAL; - - /* Use one page as a bit array of possible slots */ - inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC); - if (!inuse) - return -ENOMEM; + if (!p || p[1] != 'd' || strchr(p + 2, '%')) + return -EINVAL; - for_each_netdev(net, d) { - struct netdev_name_node *name_node; + /* Use one page as a bit array of possible slots */ + inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC); + if (!inuse) + return -ENOMEM; - netdev_for_each_altname(d, name_node) { - if (!sscanf(name_node->name, name, &i)) - continue; - if (i < 0 || i >= max_netdevices) - continue; + for_each_netdev(net, d) { + struct netdev_name_node *name_node; - /* avoid cases where sscanf is not exact inverse of printf */ - snprintf(buf, IFNAMSIZ, name, i); - if (!strncmp(buf, name_node->name, IFNAMSIZ)) - __set_bit(i, inuse); - } - if (!sscanf(d->name, name, &i)) + netdev_for_each_altname(d, name_node) { + if (!sscanf(name_node->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) continue; - /* avoid cases where sscanf is not exact inverse of printf */ + /* avoid cases where sscanf is not exact inverse of printf */ snprintf(buf, IFNAMSIZ, name, i); - if (!strncmp(buf, d->name, IFNAMSIZ)) + if (!strncmp(buf, name_node->name, IFNAMSIZ)) __set_bit(i, inuse); } + if (!sscanf(d->name, name, &i)) + continue; + if (i < 0 || i >= max_netdevices) + continue; - i = find_first_zero_bit(inuse, max_netdevices); - bitmap_free(inuse); + /* avoid cases where sscanf is not exact inverse of printf */ + snprintf(buf, IFNAMSIZ, name, i); + if (!strncmp(buf, d->name, IFNAMSIZ)) + __set_bit(i, inuse); } - snprintf(buf, IFNAMSIZ, name, i); - if (!netdev_name_in_use(net, buf)) - return i; + i = find_first_zero_bit(inuse, max_netdevices); + bitmap_free(inuse); + if (i == max_netdevices) + return -ENFILE; - /* It is possible to run out of possible slots - * when the name is long and there isn't enough space left - * for the digits, or if all bits are used. - */ - return -ENFILE; + /* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */ + strscpy(buf, name, IFNAMSIZ); + snprintf(res, IFNAMSIZ, buf, i); + return i; } +/* Returns negative errno or allocated unit id (see __dev_alloc_name()) */ static int dev_prep_valid_name(struct net *net, struct net_device *dev, - const char *want_name, char *out_name) + const char *want_name, char *out_name, + int dup_errno) { - int ret; - if (!dev_valid_name(want_name)) return -EINVAL; - if (strchr(want_name, '%')) { - ret = __dev_alloc_name(net, want_name, out_name); - return ret < 0 ? ret : 0; - } else if (netdev_name_in_use(net, want_name)) { - return -EEXIST; - } else if (out_name != want_name) { - strscpy(out_name, want_name, IFNAMSIZ); - } + if (strchr(want_name, '%')) + return __dev_alloc_name(net, want_name, out_name); + if (netdev_name_in_use(net, want_name)) + return -dup_errno; + if (out_name != want_name) + strscpy(out_name, want_name, IFNAMSIZ); return 0; } -static int dev_alloc_name_ns(struct net *net, - struct net_device *dev, - const char *name) -{ - char buf[IFNAMSIZ]; - int ret; - - BUG_ON(!net); - ret = __dev_alloc_name(net, name, buf); - if (ret >= 0) - strscpy(dev->name, buf, IFNAMSIZ); - return ret; -} - /** * dev_alloc_name - allocate a name for a device * @dev: device @@ -1184,20 +1159,17 @@ static int dev_alloc_name_ns(struct net *net, int dev_alloc_name(struct net_device *dev, const char *name) { - return dev_alloc_name_ns(dev_net(dev), dev, name); + return dev_prep_valid_name(dev_net(dev), dev, name, dev->name, ENFILE); } EXPORT_SYMBOL(dev_alloc_name); static int dev_get_valid_name(struct net *net, struct net_device *dev, const char *name) { - char buf[IFNAMSIZ]; int ret; - ret = dev_prep_valid_name(net, dev, name, buf); - if (ret >= 0) - strscpy(dev->name, buf, IFNAMSIZ); - return ret; + ret = dev_prep_valid_name(net, dev, name, dev->name, EEXIST); + return ret < 0 ? ret : 0; } /** @@ -3942,7 +3914,8 @@ EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue); #endif /* CONFIG_NET_EGRESS */ #ifdef CONFIG_NET_XGRESS -static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) +static int tc_run(struct tcx_entry *entry, struct sk_buff *skb, + enum skb_drop_reason *drop_reason) { int ret = TC_ACT_UNSPEC; #ifdef CONFIG_NET_CLS_ACT @@ -3954,12 +3927,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb) tc_skb_cb(skb)->mru = 0; tc_skb_cb(skb)->post_ct = false; + res.drop_reason = *drop_reason; mini_qdisc_bstats_cpu_update(miniq, skb); ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false); /* Only tcf related quirks below. */ switch (ret) { case TC_ACT_SHOT: + *drop_reason = res.drop_reason; mini_qdisc_qstats_cpu_drop(miniq); break; case TC_ACT_OK: @@ -4009,6 +3984,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev, bool *another) { struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress); + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_INGRESS; int sch_ret; if (!entry) @@ -4026,7 +4002,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, if (sch_ret != TC_ACT_UNSPEC) goto ingress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); ingress_verdict: switch (sch_ret) { case TC_ACT_REDIRECT: @@ -4043,7 +4019,7 @@ ingress_verdict: *ret = NET_RX_SUCCESS; return NULL; case TC_ACT_SHOT: - kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS); + kfree_skb_reason(skb, drop_reason); *ret = NET_RX_DROP; return NULL; /* used by tc_run */ @@ -4064,6 +4040,7 @@ static __always_inline struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress); + enum skb_drop_reason drop_reason = SKB_DROP_REASON_TC_EGRESS; int sch_ret; if (!entry) @@ -4077,7 +4054,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) if (sch_ret != TC_ACT_UNSPEC) goto egress_verdict; } - sch_ret = tc_run(tcx_entry(entry), skb); + sch_ret = tc_run(tcx_entry(entry), skb, &drop_reason); egress_verdict: switch (sch_ret) { case TC_ACT_REDIRECT: @@ -4086,7 +4063,7 @@ egress_verdict: *ret = NET_XMIT_SUCCESS; return NULL; case TC_ACT_SHOT: - kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS); + kfree_skb_reason(skb, drop_reason); *ret = NET_XMIT_DROP; return NULL; /* used by tc_run */ @@ -6555,9 +6532,11 @@ static int __napi_poll(struct napi_struct *n, bool *repoll) * accidentally calling ->poll() when NAPI is not scheduled. */ work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) { + if (napi_is_scheduled(n)) { work = n->poll(n, weight); trace_napi_poll(n, work, weight); + + xdp_do_check_flushed(n); } if (unlikely(work > weight)) @@ -9055,6 +9034,28 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b) } EXPORT_SYMBOL(netdev_port_same_parent_id); +static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin) +{ +#if IS_ENABLED(CONFIG_DPLL) + rtnl_lock(); + dev->dpll_pin = dpll_pin; + rtnl_unlock(); +#endif +} + +void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin) +{ + WARN_ON(!dpll_pin); + netdev_dpll_pin_assign(dev, dpll_pin); +} +EXPORT_SYMBOL(netdev_dpll_pin_set); + +void netdev_dpll_pin_clear(struct net_device *dev) +{ + netdev_dpll_pin_assign(dev, NULL); +} +EXPORT_SYMBOL(netdev_dpll_pin_clear); + /** * dev_change_proto_down - set carrier according to proto_down. * @@ -10562,7 +10563,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, } EXPORT_SYMBOL(netdev_stats_to_stats64); -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev) +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc( + struct net_device *dev) { struct net_device_core_stats __percpu *p; @@ -10575,7 +10577,23 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device /* This READ_ONCE() pairs with the cmpxchg() above */ return READ_ONCE(dev->core_stats); } -EXPORT_SYMBOL(netdev_core_stats_alloc); + +noinline void netdev_core_stats_inc(struct net_device *dev, u32 offset) +{ + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ + struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); + unsigned long __percpu *field; + + if (unlikely(!p)) { + p = netdev_core_stats_alloc(dev); + if (!p) + return; + } + + field = (__force unsigned long __percpu *)((__force void *)p + offset); + this_cpu_inc(*field); +} +EXPORT_SYMBOL_GPL(netdev_core_stats_inc); /** * dev_get_stats - get network device statistics @@ -11149,7 +11167,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, /* We get here if we can't use the current device name */ if (!pat) goto out; - err = dev_prep_valid_name(net, dev, pat, new_name); + err = dev_prep_valid_name(net, dev, pat, new_name, EEXIST); if (err < 0) goto out; } diff --git a/net/core/dev.h b/net/core/dev.h index f2037d4021..3f5eb92396 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -142,4 +142,10 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev, } int rps_cpumask_housekeeping(struct cpumask *mask); + +#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) +void xdp_do_check_flushed(struct napi_struct *napi); +#else +static inline void xdp_do_check_flushed(struct napi_struct *napi) { } +#endif #endif diff --git a/net/core/dev_addr_lists_test.c b/net/core/dev_addr_lists_test.c index 90e7e3811a..4dbd0dc6ae 100644 --- a/net/core/dev_addr_lists_test.c +++ b/net/core/dev_addr_lists_test.c @@ -233,4 +233,5 @@ static struct kunit_suite dev_addr_test_suite = { }; kunit_test_suite(dev_addr_test_suite); +MODULE_DESCRIPTION("KUnit tests for struct netdev_hw_addr_list"); MODULE_LICENSE("GPL"); diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index b46aedc369..feeddf95f4 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -382,7 +382,7 @@ static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) if (err) return err; - err = dsa_master_hwtstamp_validate(dev, &kernel_cfg, &extack); + err = dsa_conduit_hwtstamp_validate(dev, &kernel_cfg, &extack); if (err) { if (extack._msg) netdev_err(dev, "%s\n", extack._msg); diff --git a/net/core/dst.c b/net/core/dst.c index 980e2fd2f0..6838d3212c 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -45,7 +45,7 @@ const struct dst_metrics dst_default_metrics = { EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, - struct net_device *dev, int initial_ref, int initial_obsolete, + struct net_device *dev, int initial_obsolete, unsigned short flags) { dst->dev = dev; @@ -66,7 +66,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, dst->tclassid = 0; #endif dst->lwtstate = NULL; - rcuref_init(&dst->__rcuref, initial_ref); + rcuref_init(&dst->__rcuref, 1); INIT_LIST_HEAD(&dst->rt_uncached); dst->__use = 0; dst->lastuse = jiffies; @@ -77,7 +77,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, EXPORT_SYMBOL(dst_init); void *dst_alloc(struct dst_ops *ops, struct net_device *dev, - int initial_ref, int initial_obsolete, unsigned short flags) + int initial_obsolete, unsigned short flags) { struct dst_entry *dst; @@ -90,7 +90,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, if (!dst) return NULL; - dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags); + dst_init(dst, ops, dev, initial_obsolete, flags); return dst; } @@ -270,7 +270,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, struct dst_entry *dst; dst = &md_dst->dst; - dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, + dst_init(dst, &dst_blackhole_ops, NULL, DST_OBSOLETE_NONE, DST_METADATA | DST_NOCOUNT); memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); md_dst->type = type; diff --git a/net/core/filter.c b/net/core/filter.c index 01f2417dee..cee5383831 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -81,9 +81,12 @@ #include <net/xdp.h> #include <net/mptcp.h> #include <net/netfilter/nf_conntrack_bpf.h> +#include <net/netkit.h> #include <linux/un.h> #include <net/xdp_sock_drv.h> +#include "dev.h" + static const struct bpf_func_proto * bpf_sk_base_func_proto(enum bpf_func_id func_id); @@ -2467,6 +2470,16 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info); +static struct net_device *skb_get_peer_dev(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (likely(ops->ndo_get_peer_dev)) + return INDIRECT_CALL_1(ops->ndo_get_peer_dev, + netkit_peer_dev, dev); + return NULL; +} + int skb_do_redirect(struct sk_buff *skb) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); @@ -2480,12 +2493,9 @@ int skb_do_redirect(struct sk_buff *skb) if (unlikely(!dev)) goto out_drop; if (flags & BPF_F_PEER) { - const struct net_device_ops *ops = dev->netdev_ops; - - if (unlikely(!ops->ndo_get_peer_dev || - !skb_at_tc_ingress(skb))) + if (unlikely(!skb_at_tc_ingress(skb))) goto out_drop; - dev = ops->ndo_get_peer_dev(dev); + dev = skb_get_peer_dev(dev); if (unlikely(!dev || !(dev->flags & IFF_UP) || net_eq(net, dev_net(dev)))) @@ -4260,6 +4270,20 @@ void xdp_do_flush(void) } EXPORT_SYMBOL_GPL(xdp_do_flush); +#if defined(CONFIG_DEBUG_NET) && defined(CONFIG_BPF_SYSCALL) +void xdp_do_check_flushed(struct napi_struct *napi) +{ + bool ret; + + ret = dev_check_flush(); + ret |= cpu_map_check_flush(); + ret |= xsk_map_check_flush(); + + WARN_ONCE(ret, "Missing xdp_do_flush() invocation after NAPI by %ps\n", + napi->poll); +} +#endif + void bpf_clear_redirect_map(struct bpf_map *map) { struct bpf_redirect_info *ri; @@ -5903,6 +5927,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.fi->fib_priority; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) + params->ipv4_src = fib_result_prefsrc(net, &res); + /* xdp and cls_bpf programs are run in RCU-bh so * rcu_read_lock_bh is not needed here */ @@ -6045,6 +6072,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->rt_metric = res.f6i->fib6_metric; params->ifindex = dev->ifindex; + if (flags & BPF_FIB_LOOKUP_SRC) { + if (res.f6i->fib6_prefsrc.plen) { + *src = res.f6i->fib6_prefsrc.addr; + } else { + err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, + &fl6.daddr, 0, + src); + if (err) + return BPF_FIB_LKUP_RET_NO_SRC_ADDR; + } + } + if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) goto set_fwd_params; @@ -6063,7 +6102,8 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \ + BPF_FIB_LOOKUP_SRC) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) @@ -7911,14 +7951,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_setsockopt_proto; default: return NULL; @@ -7929,14 +7974,19 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_CGROUP_INET6_BIND: case BPF_CGROUP_INET4_CONNECT: case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: case BPF_CGROUP_UDP4_RECVMSG: case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: case BPF_CGROUP_INET4_GETPEERNAME: case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: case BPF_CGROUP_INET4_GETSOCKNAME: case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: return &bpf_sock_addr_getsockopt_proto; default: return NULL; @@ -8984,8 +9034,8 @@ static bool sock_addr_is_valid_access(int off, int size, if (off % size != 0) return false; - /* Disallow access to IPv6 fields from IPv4 contex and vise - * versa. + /* Disallow access to fields not belonging to the attach type's address + * family. */ switch (off) { case bpf_ctx_range(struct bpf_sock_addr, user_ip4): @@ -11777,9 +11827,7 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id) return func; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); __bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, struct bpf_dynptr_kern *ptr__uninit) { @@ -11826,7 +11874,7 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } -__diag_pop(); +__bpf_kfunc_end_defs(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, struct bpf_dynptr_kern *ptr__uninit) @@ -11889,10 +11937,7 @@ static int __init bpf_kfunc_init(void) } late_initcall(bpf_kfunc_init); -/* Disables missing prototype warnings */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); /* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code. * @@ -11926,7 +11971,7 @@ __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) return sk->sk_prot->diag_destroy(sk, ECONNABORTED); } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(bpf_sk_iter_kfunc_ids) BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) diff --git a/net/core/gso_test.c b/net/core/gso_test.c new file mode 100644 index 0000000000..4c2e77bd12 --- /dev/null +++ b/net/core/gso_test.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <kunit/test.h> +#include <linux/skbuff.h> + +static const char hdr[] = "abcdefgh"; +#define GSO_TEST_SIZE 1000 + +static void __init_skb(struct sk_buff *skb) +{ + skb_reset_mac_header(skb); + memcpy(skb_mac_header(skb), hdr, sizeof(hdr)); + + /* skb_segment expects skb->data at start of payload */ + skb_pull(skb, sizeof(hdr)); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + /* proto is arbitrary, as long as not ETH_P_TEB or vlan */ + skb->protocol = htons(ETH_P_ATALK); + skb_shinfo(skb)->gso_size = GSO_TEST_SIZE; +} + +enum gso_test_nr { + GSO_TEST_LINEAR, + GSO_TEST_NO_GSO, + GSO_TEST_FRAGS, + GSO_TEST_FRAGS_PURE, + GSO_TEST_GSO_PARTIAL, + GSO_TEST_FRAG_LIST, + GSO_TEST_FRAG_LIST_PURE, + GSO_TEST_FRAG_LIST_NON_UNIFORM, + GSO_TEST_GSO_BY_FRAGS, +}; + +struct gso_test_case { + enum gso_test_nr id; + const char *name; + + /* input */ + unsigned int linear_len; + unsigned int nr_frags; + const unsigned int *frags; + unsigned int nr_frag_skbs; + const unsigned int *frag_skbs; + + /* output as expected */ + unsigned int nr_segs; + const unsigned int *segs; +}; + +static struct gso_test_case cases[] = { + { + .id = GSO_TEST_NO_GSO, + .name = "no_gso", + .linear_len = GSO_TEST_SIZE, + .nr_segs = 1, + .segs = (const unsigned int[]) { GSO_TEST_SIZE }, + }, + { + .id = GSO_TEST_LINEAR, + .name = "linear", + .linear_len = GSO_TEST_SIZE + GSO_TEST_SIZE + 1, + .nr_segs = 3, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 }, + }, + { + .id = GSO_TEST_FRAGS, + .name = "frags", + .linear_len = GSO_TEST_SIZE, + .nr_frags = 2, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, 1 }, + .nr_segs = 3, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 1 }, + }, + { + .id = GSO_TEST_FRAGS_PURE, + .name = "frags_pure", + .nr_frags = 3, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 }, + .nr_segs = 3, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, 2 }, + }, + { + .id = GSO_TEST_GSO_PARTIAL, + .name = "gso_partial", + .linear_len = GSO_TEST_SIZE, + .nr_frags = 2, + .frags = (const unsigned int[]) { GSO_TEST_SIZE, 3 }, + .nr_segs = 2, + .segs = (const unsigned int[]) { 2 * GSO_TEST_SIZE, 3 }, + }, + { + /* commit 89319d3801d1: frag_list on mss boundaries */ + .id = GSO_TEST_FRAG_LIST, + .name = "frag_list", + .linear_len = GSO_TEST_SIZE, + .nr_frag_skbs = 2, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, + .nr_segs = 3, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE }, + }, + { + .id = GSO_TEST_FRAG_LIST_PURE, + .name = "frag_list_pure", + .nr_frag_skbs = 2, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, + .nr_segs = 2, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE }, + }, + { + /* commit 43170c4e0ba7: GRO of frag_list trains */ + .id = GSO_TEST_FRAG_LIST_NON_UNIFORM, + .name = "frag_list_non_uniform", + .linear_len = GSO_TEST_SIZE, + .nr_frag_skbs = 4, + .frag_skbs = (const unsigned int[]) { GSO_TEST_SIZE, 1, GSO_TEST_SIZE, 2 }, + .nr_segs = 4, + .segs = (const unsigned int[]) { GSO_TEST_SIZE, GSO_TEST_SIZE, GSO_TEST_SIZE, 3 }, + }, + { + /* commit 3953c46c3ac7 ("sk_buff: allow segmenting based on frag sizes") and + * commit 90017accff61 ("sctp: Add GSO support") + * + * "there will be a cover skb with protocol headers and + * children ones containing the actual segments" + */ + .id = GSO_TEST_GSO_BY_FRAGS, + .name = "gso_by_frags", + .nr_frag_skbs = 4, + .frag_skbs = (const unsigned int[]) { 100, 200, 300, 400 }, + .nr_segs = 4, + .segs = (const unsigned int[]) { 100, 200, 300, 400 }, + }, +}; + +static void gso_test_case_to_desc(struct gso_test_case *t, char *desc) +{ + sprintf(desc, "%s", t->name); +} + +KUNIT_ARRAY_PARAM(gso_test, cases, gso_test_case_to_desc); + +static void gso_test_func(struct kunit *test) +{ + const int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + struct sk_buff *skb, *segs, *cur, *next, *last; + const struct gso_test_case *tcase; + netdev_features_t features; + struct page *page; + int i; + + tcase = test->param_value; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + skb = build_skb(page_address(page), sizeof(hdr) + tcase->linear_len + shinfo_size); + KUNIT_ASSERT_NOT_NULL(test, skb); + __skb_put(skb, sizeof(hdr) + tcase->linear_len); + + __init_skb(skb); + + if (tcase->nr_frags) { + unsigned int pg_off = 0; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + page_ref_add(page, tcase->nr_frags - 1); + + for (i = 0; i < tcase->nr_frags; i++) { + skb_fill_page_desc(skb, i, page, pg_off, tcase->frags[i]); + pg_off += tcase->frags[i]; + } + + KUNIT_ASSERT_LE(test, pg_off, PAGE_SIZE); + + skb->data_len = pg_off; + skb->len += skb->data_len; + skb->truesize += skb->data_len; + } + + if (tcase->frag_skbs) { + unsigned int total_size = 0, total_true_size = 0; + struct sk_buff *frag_skb, *prev = NULL; + + for (i = 0; i < tcase->nr_frag_skbs; i++) { + unsigned int frag_size; + + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + + frag_size = tcase->frag_skbs[i]; + frag_skb = build_skb(page_address(page), + frag_size + shinfo_size); + KUNIT_ASSERT_NOT_NULL(test, frag_skb); + __skb_put(frag_skb, frag_size); + + if (prev) + prev->next = frag_skb; + else + skb_shinfo(skb)->frag_list = frag_skb; + prev = frag_skb; + + total_size += frag_size; + total_true_size += frag_skb->truesize; + } + + skb->len += total_size; + skb->data_len += total_size; + skb->truesize += total_true_size; + + if (tcase->id == GSO_TEST_GSO_BY_FRAGS) + skb_shinfo(skb)->gso_size = GSO_BY_FRAGS; + } + + features = NETIF_F_SG | NETIF_F_HW_CSUM; + if (tcase->id == GSO_TEST_GSO_PARTIAL) + features |= NETIF_F_GSO_PARTIAL; + + /* TODO: this should also work with SG, + * rather than hit BUG_ON(i >= nfrags) + */ + if (tcase->id == GSO_TEST_FRAG_LIST_NON_UNIFORM) + features &= ~NETIF_F_SG; + + segs = skb_segment(skb, features); + if (IS_ERR(segs)) { + KUNIT_FAIL(test, "segs error %lld", PTR_ERR(segs)); + goto free_gso_skb; + } else if (!segs) { + KUNIT_FAIL(test, "no segments"); + goto free_gso_skb; + } + + last = segs->prev; + for (cur = segs, i = 0; cur; cur = next, i++) { + next = cur->next; + + KUNIT_ASSERT_EQ(test, cur->len, sizeof(hdr) + tcase->segs[i]); + + /* segs have skb->data pointing to the mac header */ + KUNIT_ASSERT_PTR_EQ(test, skb_mac_header(cur), cur->data); + KUNIT_ASSERT_PTR_EQ(test, skb_network_header(cur), cur->data + sizeof(hdr)); + + /* header was copied to all segs */ + KUNIT_ASSERT_EQ(test, memcmp(skb_mac_header(cur), hdr, sizeof(hdr)), 0); + + /* last seg can be found through segs->prev pointer */ + if (!next) + KUNIT_ASSERT_PTR_EQ(test, cur, last); + + consume_skb(cur); + } + + KUNIT_ASSERT_EQ(test, i, tcase->nr_segs); + +free_gso_skb: + consume_skb(skb); +} + +static struct kunit_case gso_test_cases[] = { + KUNIT_CASE_PARAM(gso_test_func, gso_test_gen_params), + {} +}; + +static struct kunit_suite gso_test_suite = { + .name = "net_core_gso", + .test_cases = gso_test_cases, +}; + +kunit_test_suite(gso_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for segmentation offload"); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index d6a70aeaa5..d22f091982 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -88,6 +88,12 @@ static void update_classid_task(struct task_struct *p, u32 classid) }; unsigned int fd = 0; + /* Only update the leader task, when many threads in this task, + * so it can avoid the useless traversal. + */ + if (p != p->group_leader) + return; + do { task_lock(p); fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index c1aea8b756..fe61f85bcf 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -5,6 +5,7 @@ #include <linux/rtnetlink.h> #include <net/net_namespace.h> #include <net/sock.h> +#include <net/xdp.h> #include "netdev-genl-gen.h" @@ -12,15 +13,24 @@ static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { + u64 xdp_rx_meta = 0; void *hdr; hdr = genlmsg_iput(rsp, info); if (!hdr) return -EMSGSIZE; +#define XDP_METADATA_KFUNC(_, flag, __, xmo) \ + if (netdev->xdp_metadata_ops && netdev->xdp_metadata_ops->xmo) \ + xdp_rx_meta |= flag; +XDP_METADATA_KFUNC_xxx +#undef XDP_METADATA_KFUNC + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, - netdev->xdp_features, NETDEV_A_DEV_PAD)) { + netdev->xdp_features, NETDEV_A_DEV_PAD) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + xdp_rx_meta, NETDEV_A_DEV_PAD)) { genlmsg_cancel(rsp, hdr); return -EINVAL; } diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 31f923e7b5..dec5443372 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -211,10 +211,6 @@ static int page_pool_init(struct page_pool *pool, */ } - if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && - pool->p.flags & PP_FLAG_PAGE_FRAG) - return -EINVAL; - #ifdef CONFIG_PAGE_POOL_STATS pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); if (!pool->recycle_stats) @@ -363,12 +359,20 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) if (dma_mapping_error(pool->p.dev, dma)) return false; - page_pool_set_dma_addr(page, dma); + if (page_pool_set_dma_addr(page, dma)) + goto unmap_failed; if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) page_pool_dma_sync_for_device(pool, page, pool->p.max_len); return true; + +unmap_failed: + WARN_ON_ONCE("unexpected DMA address, please report to netdev@"); + dma_unmap_page_attrs(pool->p.dev, dma, + PAGE_SIZE << pool->p.order, pool->p.dma_dir, + DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + return false; } static void page_pool_set_pp_info(struct page_pool *pool, @@ -376,6 +380,14 @@ static void page_pool_set_pp_info(struct page_pool *pool, { page->pp = pool; page->pp_magic |= PP_SIGNATURE; + + /* Ensuring all pages have been split into one fragment initially: + * page_pool_set_pp_info() is only called once for every page when it + * is allocated from the page allocator and page_pool_fragment_page() + * is dirtying the same cache line as the page->pp_magic above, so + * the overhead is negligible. + */ + page_pool_fragment_page(page, 1); if (pool->p.init_callback) pool->p.init_callback(page, pool->p.init_arg); } @@ -672,7 +684,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, struct page *page = virt_to_head_page(data[i]); /* It is not the last user for the page frag case */ - if (!page_pool_is_last_frag(pool, page)) + if (!page_pool_is_last_frag(page)) continue; page = __page_pool_put_page(pool, page, -1, false); @@ -748,8 +760,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int max_size = PAGE_SIZE << pool->p.order; struct page *page = pool->frag_page; - if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || - size > max_size)) + if (WARN_ON(size > max_size)) return NULL; size = ALIGN(size, dma_get_cache_alignment()); @@ -802,7 +813,7 @@ static void page_pool_empty_ring(struct page_pool *pool) } } -static void page_pool_free(struct page_pool *pool) +static void __page_pool_destroy(struct page_pool *pool) { if (pool->disconnect) pool->disconnect(pool); @@ -853,7 +864,7 @@ static int page_pool_release(struct page_pool *pool) page_pool_scrub(pool); inflight = page_pool_inflight(pool); if (!inflight) - page_pool_free(pool); + __page_pool_destroy(pool); return inflight; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4d1696677c..57cea67b75 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -200,6 +200,7 @@ pf(VID_RND) /* Random VLAN ID */ \ pf(SVID_RND) /* Random SVLAN ID */ \ pf(NODE) /* Node memory alloc*/ \ + pf(SHARED) /* Shared SKB */ \ #define pf(flag) flag##_SHIFT, enum pkt_flags { @@ -1198,7 +1199,8 @@ static ssize_t pktgen_if_write(struct file *file, ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) || !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) return -ENOTSUPP; - if (value > 0 && pkt_dev->n_imix_entries > 0) + if (value > 0 && (pkt_dev->n_imix_entries > 0 || + !(pkt_dev->flags & F_SHARED))) return -EINVAL; i += len; @@ -1257,6 +1259,10 @@ static ssize_t pktgen_if_write(struct file *file, ((pkt_dev->xmit_mode == M_START_XMIT) && (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))))) return -ENOTSUPP; + + if (value > 1 && !(pkt_dev->flags & F_SHARED)) + return -EINVAL; + pkt_dev->burst = value < 1 ? 1 : value; sprintf(pg_result, "OK: burst=%u", pkt_dev->burst); return count; @@ -1318,9 +1324,10 @@ static ssize_t pktgen_if_write(struct file *file, return count; } if (!strcmp(name, "flag")) { + bool disable = false; __u32 flag; char f[32]; - bool disable = false; + char *end; memset(f, 0, 32); len = strn_len(&user_buffer[i], sizeof(f) - 1); @@ -1332,28 +1339,42 @@ static ssize_t pktgen_if_write(struct file *file, i += len; flag = pktgen_read_flag(f, &disable); - if (flag) { - if (disable) + if (disable) { + /* If "clone_skb", or "burst" parameters are + * configured, it means that the skb still + * needs to be referenced by the pktgen, so + * the skb must be shared. + */ + if (flag == F_SHARED && (pkt_dev->clone_skb || + pkt_dev->burst > 1)) + return -EINVAL; pkt_dev->flags &= ~flag; - else + } else { pkt_dev->flags |= flag; - } else { - sprintf(pg_result, - "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", - f, - "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, " - "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, " - "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, " - "NO_TIMESTAMP, " -#ifdef CONFIG_XFRM - "IPSEC, " -#endif - "NODE_ALLOC\n"); + } + + sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); return count; } - sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); + + /* Unknown flag */ + end = pkt_dev->result + sizeof(pkt_dev->result); + pg_result += sprintf(pg_result, + "Flag -:%s:- unknown\n" + "Available flags, (prepend ! to un-set flag):\n", f); + + for (int n = 0; n < NR_PKT_FLAGS && pg_result < end; n++) { + if (!IS_ENABLED(CONFIG_XFRM) && n == IPSEC_SHIFT) + continue; + pg_result += snprintf(pg_result, end - pg_result, + "%s, ", pkt_flag_names[n]); + } + if (!WARN_ON_ONCE(pg_result >= end)) { + /* Remove the comma and whitespace at the end */ + *(pg_result - 2) = '\0'; + } + return count; } if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { @@ -3440,12 +3461,24 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { - unsigned int burst = READ_ONCE(pkt_dev->burst); + bool skb_shared = !!(READ_ONCE(pkt_dev->flags) & F_SHARED); struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; + unsigned int burst = 1; struct sk_buff *skb; + int clone_skb = 0; int ret; + /* If 'skb_shared' is false, the read of possible + * new values (if any) for 'burst' and 'clone_skb' will be skipped to + * prevent some concurrent changes from slipping in. And the stabilized + * config will be read in during the next run of pktgen_xmit. + */ + if (skb_shared) { + burst = READ_ONCE(pkt_dev->burst); + clone_skb = READ_ONCE(pkt_dev->clone_skb); + } + /* If device is offline, then don't send */ if (unlikely(!netif_running(odev) || !netif_carrier_ok(odev))) { pktgen_stop_device(pkt_dev); @@ -3462,7 +3495,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) /* If no skb or clone count exhausted then get new one */ if (!pkt_dev->skb || (pkt_dev->last_ok && - ++pkt_dev->clone_count >= pkt_dev->clone_skb)) { + ++pkt_dev->clone_count >= clone_skb)) { /* build a new pkt */ kfree_skb(pkt_dev->skb); @@ -3483,7 +3516,8 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) { skb = pkt_dev->skb; skb->protocol = eth_type_trans(skb, skb->dev); - refcount_add(burst, &skb->users); + if (skb_shared) + refcount_add(burst, &skb->users); local_bh_disable(); do { ret = netif_receive_skb(skb); @@ -3491,6 +3525,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->errors++; pkt_dev->sofar++; pkt_dev->seq_num++; + if (unlikely(!skb_shared)) { + pkt_dev->skb = NULL; + break; + } if (refcount_read(&skb->users) != burst) { /* skb was queued by rps/rfs or taps, * so cannot reuse this skb @@ -3509,9 +3547,14 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) goto out; /* Skips xmit_mode M_START_XMIT */ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { local_bh_disable(); - refcount_inc(&pkt_dev->skb->users); + if (skb_shared) + refcount_inc(&pkt_dev->skb->users); ret = dev_queue_xmit(pkt_dev->skb); + + if (!skb_shared && dev_xmit_complete(ret)) + pkt_dev->skb = NULL; + switch (ret) { case NET_XMIT_SUCCESS: pkt_dev->sofar++; @@ -3549,11 +3592,15 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->last_ok = 0; goto unlock; } - refcount_add(burst, &pkt_dev->skb->users); + if (skb_shared) + refcount_add(burst, &pkt_dev->skb->users); xmit_more: ret = netdev_start_xmit(pkt_dev->skb, odev, txq, --burst > 0); + if (!skb_shared && dev_xmit_complete(ret)) + pkt_dev->skb = NULL; + switch (ret) { case NETDEV_TX_OK: pkt_dev->last_ok = 1; @@ -3575,7 +3622,8 @@ xmit_more: fallthrough; case NETDEV_TX_BUSY: /* Retry it next time */ - refcount_dec(&(pkt_dev->skb->users)); + if (skb_shared) + refcount_dec(&pkt_dev->skb->users); pkt_dev->last_ok = 0; } if (unlikely(burst)) @@ -3588,7 +3636,8 @@ out: /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { - pktgen_wait_for_skb(pkt_dev); + if (pkt_dev->skb) + pktgen_wait_for_skb(pkt_dev); /* Done with this */ pktgen_stop_device(pkt_dev); @@ -3771,6 +3820,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->svlan_id = 0xffff; pkt_dev->burst = 1; pkt_dev->node = NUMA_NO_NODE; + pkt_dev->flags = F_SHARED; /* SKB shared by default */ err = pktgen_setup_dev(t->net, pkt_dev, ifname); if (err) @@ -3982,8 +4032,7 @@ static void __net_exit pg_net_exit(struct net *net) list_for_each_safe(q, n, &list) { t = list_entry(q, struct pktgen_thread, th_list); list_del(&t->th_list); - kthread_stop(t->tsk); - put_task_struct(t->tsk); + kthread_stop_put(t->tsk); kfree(t); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fcf331a447..bf4c3f65ad 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -57,6 +57,7 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/addrconf.h> #endif +#include <linux/dpll.h> #include "dev.h" @@ -1055,6 +1056,15 @@ static size_t rtnl_devlink_port_size(const struct net_device *dev) return size; } +static size_t rtnl_dpll_pin_size(const struct net_device *dev) +{ + size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ + + size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev)); + + return size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1111,6 +1121,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + rtnl_prop_list_size(dev) + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + rtnl_devlink_port_size(dev) + + rtnl_dpll_pin_size(dev) + 0; } @@ -1774,6 +1785,28 @@ nest_cancel: return ret; } +static int rtnl_fill_dpll_pin(struct sk_buff *skb, + const struct net_device *dev) +{ + struct nlattr *dpll_pin_nest; + int ret; + + dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN); + if (!dpll_pin_nest) + return -EMSGSIZE; + + ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev)); + if (ret < 0) + goto nest_cancel; + + nla_nest_end(skb, dpll_pin_nest); + return 0; + +nest_cancel: + nla_nest_cancel(skb, dpll_pin_nest); + return ret; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, @@ -1916,6 +1949,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (rtnl_fill_devlink_port(skb, dev)) goto nla_put_failure; + if (rtnl_fill_dpll_pin(skb, dev)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -4331,13 +4367,6 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_del); -static const struct nla_policy fdb_del_bulk_policy[NDA_MAX + 1] = { - [NDA_VLAN] = { .type = NLA_U16 }, - [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), - [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, - [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, -}; - static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -4358,8 +4387,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } else { - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, - fdb_del_bulk_policy, extack); + /* For bulk delete, the drivers will parse the message with + * policy. + */ + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } if (err < 0) return err; @@ -4382,6 +4413,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); + + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); + if (err) + return err; } if (dev->type != ARPHRD_ETHER) { @@ -4389,10 +4424,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, return -EINVAL; } - err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); - if (err) - return err; - err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ @@ -4406,8 +4437,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack); } else { if (ops->ndo_fdb_del_bulk) - err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, - extack); + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (err) @@ -4428,8 +4458,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, /* in case err was cleared by NTF_MASTER call */ err = -EOPNOTSUPP; if (ops->ndo_fdb_del_bulk) - err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid, - extack); + err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (!err) { @@ -6190,6 +6219,93 @@ out: return skb->len; } +static int rtnl_validate_mdb_entry_get(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct br_mdb_entry *entry = nla_data(attr); + + if (nla_len(attr) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); + return -EINVAL; + } + + if (entry->ifindex) { + NL_SET_ERR_MSG(extack, "Entry ifindex cannot be specified"); + return -EINVAL; + } + + if (entry->state) { + NL_SET_ERR_MSG(extack, "Entry state cannot be specified"); + return -EINVAL; + } + + if (entry->flags) { + NL_SET_ERR_MSG(extack, "Entry flags cannot be specified"); + return -EINVAL; + } + + if (entry->vid >= VLAN_VID_MASK) { + NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); + return -EINVAL; + } + + if (entry->addr.proto != htons(ETH_P_IP) && + entry->addr.proto != htons(ETH_P_IPV6) && + entry->addr.proto != 0) { + NL_SET_ERR_MSG(extack, "Unknown entry protocol"); + return -EINVAL; + } + + return 0; +} + +static const struct nla_policy mdba_get_policy[MDBA_GET_ENTRY_MAX + 1] = { + [MDBA_GET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + rtnl_validate_mdb_entry_get, + sizeof(struct br_mdb_entry)), + [MDBA_GET_ENTRY_ATTRS] = { .type = NLA_NESTED }, +}; + +static int rtnl_mdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_GET_ENTRY_MAX + 1]; + struct net *net = sock_net(in_skb->sk); + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse(nlh, sizeof(struct br_port_msg), tb, + MDBA_GET_ENTRY_MAX, mdba_get_policy, extack); + if (err) + return err; + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG(extack, "Invalid ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG(extack, "Device doesn't exist"); + return -ENODEV; + } + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_GET_ENTRY)) { + NL_SET_ERR_MSG(extack, "Missing MDBA_GET_ENTRY attribute"); + return -EINVAL; + } + + if (!dev->netdev_ops->ndo_mdb_get) { + NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); + return -EOPNOTSUPP; + } + + return dev->netdev_ops->ndo_mdb_get(dev, tb, NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, extack); +} + static int rtnl_validate_mdb_entry(const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -6566,7 +6682,7 @@ void __init rtnetlink_init(void) 0); rtnl_register(PF_UNSPEC, RTM_SETSTATS, rtnl_stats_set, NULL, 0); - rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, rtnl_mdb_dump, 0); + rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0); rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0); rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0); } diff --git a/net/core/selftests.c b/net/core/selftests.c index acb1ee97bb..8f801e6e3b 100644 --- a/net/core/selftests.c +++ b/net/core/selftests.c @@ -397,16 +397,14 @@ EXPORT_SYMBOL_GPL(net_selftest_get_count); void net_selftest_get_strings(u8 *data) { - u8 *p = data; int i; - for (i = 0; i < net_selftest_get_count(); i++) { - snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1, - net_selftests[i].name); - p += ETH_GSTRING_LEN; - } + for (i = 0; i < net_selftest_get_count(); i++) + ethtool_sprintf(&data, "%2d. %s", i + 1, + net_selftests[i].name); } EXPORT_SYMBOL_GPL(net_selftest_get_strings); +MODULE_DESCRIPTION("Common library for generic PHY ethtool selftests"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Oleksij Rempel <o.rempel@pengutronix.de>"); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 011d690291..94cc40a6f7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -62,6 +62,7 @@ #include <linux/if_vlan.h> #include <linux/mpls.h> #include <linux/kcov.h> +#include <linux/iov_iter.h> #include <net/protocol.h> #include <net/dst.h> @@ -847,6 +848,8 @@ EXPORT_SYMBOL(__napi_alloc_skb); void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize) { + DEBUG_NET_WARN_ON_ONCE(size > truesize); + skb_fill_page_desc(skb, i, page, off, size); skb->len += size; skb->data_len += size; @@ -859,6 +862,8 @@ void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + DEBUG_NET_WARN_ON_ONCE(size > truesize); + skb_frag_size_add(frag, size); skb->len += size; skb->data_len += size; @@ -3718,10 +3723,19 @@ EXPORT_SYMBOL(skb_dequeue_tail); void skb_queue_purge_reason(struct sk_buff_head *list, enum skb_drop_reason reason) { - struct sk_buff *skb; + struct sk_buff_head tmp; + unsigned long flags; - while ((skb = skb_dequeue(list)) != NULL) - kfree_skb_reason(skb, reason); + if (skb_queue_empty_lockless(list)) + return; + + __skb_queue_head_init(&tmp); + + spin_lock_irqsave(&list->lock, flags); + skb_queue_splice_init(list, &tmp); + spin_unlock_irqrestore(&list->lock, flags); + + __skb_queue_purge_reason(&tmp, reason); } EXPORT_SYMBOL(skb_queue_purge_reason); @@ -4510,7 +4524,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, * now. * Cap len to not accidentally hit GSO_BY_FRAGS. */ - partial_segs = min(len, GSO_BY_FRAGS - 1U) / mss; + partial_segs = min(len, GSO_BY_FRAGS - 1) / mss; if (partial_segs > 1) mss *= partial_segs; else @@ -5153,6 +5167,9 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) bool icmp_next = false; unsigned long flags; + if (skb_queue_empty_lockless(q)) + return NULL; + spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) { @@ -5752,7 +5769,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, /* In general, avoid mixing page_pool and non-page_pool allocated * pages within the same SKB. Additionally avoid dealing with clones * with page_pool pages, in case the SKB is using page_pool fragment - * references (PP_FLAG_PAGE_FRAG). Since we only take full page + * references (page_pool_alloc_frag()). Since we only take full page * references for cloned SKBs at the moment that would result in * inconsistent reference counts. * In theory we could take full references if @from is cloned and @@ -6935,3 +6952,42 @@ out: return spliced ?: ret; } EXPORT_SYMBOL(skb_splice_from_iter); + +static __always_inline +size_t memcpy_from_iter_csum(void *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + __wsum *csum = priv2; + __wsum next = csum_partial_copy_nocheck(iter_from, to + progress, len); + + *csum = csum_block_add(*csum, next, progress); + return 0; +} + +static __always_inline +size_t copy_from_user_iter_csum(void __user *iter_from, size_t progress, + size_t len, void *to, void *priv2) +{ + __wsum next, *csum = priv2; + + next = csum_and_copy_from_user(iter_from, to + progress, len); + *csum = csum_block_add(*csum, next, progress); + return next ? 0 : len; +} + +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, + __wsum *csum, struct iov_iter *i) +{ + size_t copied; + + if (WARN_ON_ONCE(!i->data_source)) + return false; + copied = iterate_and_advance2(i, bytes, addr, csum, + copy_from_user_iter_csum, + memcpy_from_iter_csum); + if (likely(copied == bytes)) + return true; + iov_iter_revert(i, copied); + return false; +} +EXPORT_SYMBOL(csum_and_copy_from_iter_full); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 93ecfceac1..4d75ef9d24 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); - if (psock) - psock->saved_data_ready(sk); + if (psock) { + read_lock_bh(&sk->sk_callback_lock); + sk_psock_data_ready(sk, psock); + read_unlock_bh(&sk->sk_callback_lock); + } rcu_read_unlock(); } } diff --git a/net/core/sock.c b/net/core/sock.c index 383e30fe79..20160865ed 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -760,7 +760,7 @@ out: return ret; } -bool sk_mc_loop(struct sock *sk) +bool sk_mc_loop(const struct sock *sk) { if (dev_recursion_level()) return false; @@ -772,7 +772,7 @@ bool sk_mc_loop(struct sock *sk) return inet_test_bit(MC_LOOP, sk); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - return inet6_sk(sk)->mc_loop; + return inet6_test_bit(MC6_LOOP, sk); #endif } WARN_ON_ONCE(1); @@ -807,9 +807,7 @@ EXPORT_SYMBOL(sock_no_linger); void sock_set_priority(struct sock *sk, u32 priority) { - lock_sock(sk); WRITE_ONCE(sk->sk_priority, priority); - release_sock(sk); } EXPORT_SYMBOL(sock_set_priority); @@ -1119,6 +1117,94 @@ int sk_setsockopt(struct sock *sk, int level, int optname, valbool = val ? 1 : 0; + /* handle options which do not require locking the socket. */ + switch (optname) { + case SO_PRIORITY: + if ((val >= 0 && val <= 6) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + sock_set_priority(sk, val); + return 0; + } + return -EPERM; + case SO_PASSSEC: + assign_bit(SOCK_PASSSEC, &sock->flags, valbool); + return 0; + case SO_PASSCRED: + assign_bit(SOCK_PASSCRED, &sock->flags, valbool); + return 0; + case SO_PASSPIDFD: + assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool); + return 0; + case SO_TYPE: + case SO_PROTOCOL: + case SO_DOMAIN: + case SO_ERROR: + return -ENOPROTOOPT; +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: + if (val < 0) + return -EINVAL; + WRITE_ONCE(sk->sk_ll_usec, val); + return 0; + case SO_PREFER_BUSY_POLL: + if (valbool && !sockopt_capable(CAP_NET_ADMIN)) + return -EPERM; + WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); + return 0; + case SO_BUSY_POLL_BUDGET: + if (val > READ_ONCE(sk->sk_busy_poll_budget) && + !sockopt_capable(CAP_NET_ADMIN)) + return -EPERM; + if (val < 0 || val > U16_MAX) + return -EINVAL; + WRITE_ONCE(sk->sk_busy_poll_budget, val); + return 0; +#endif + case SO_MAX_PACING_RATE: + { + unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; + unsigned long pacing_rate; + + if (sizeof(ulval) != sizeof(val) && + optlen >= sizeof(ulval) && + copy_from_sockptr(&ulval, optval, sizeof(ulval))) { + return -EFAULT; + } + if (ulval != ~0UL) + cmpxchg(&sk->sk_pacing_status, + SK_PACING_NONE, + SK_PACING_NEEDED); + /* Pairs with READ_ONCE() from sk_getsockopt() */ + WRITE_ONCE(sk->sk_max_pacing_rate, ulval); + pacing_rate = READ_ONCE(sk->sk_pacing_rate); + if (ulval < pacing_rate) + WRITE_ONCE(sk->sk_pacing_rate, ulval); + return 0; + } + case SO_TXREHASH: + if (val < -1 || val > 1) + return -EINVAL; + if ((u8)val == SOCK_TXREHASH_DEFAULT) + val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); + /* Paired with READ_ONCE() in tcp_rtx_synack() + * and sk_getsockopt(). + */ + WRITE_ONCE(sk->sk_txrehash, (u8)val); + return 0; + case SO_PEEK_OFF: + { + int (*set_peek_off)(struct sock *sk, int val); + + set_peek_off = READ_ONCE(sock->ops)->set_peek_off; + if (set_peek_off) + ret = set_peek_off(sk, val); + else + ret = -EOPNOTSUPP; + return ret; + } + } + sockopt_lock_sock(sk); switch (optname) { @@ -1134,12 +1220,6 @@ int sk_setsockopt(struct sock *sk, int level, int optname, case SO_REUSEPORT: sk->sk_reuseport = valbool; break; - case SO_TYPE: - case SO_PROTOCOL: - case SO_DOMAIN: - case SO_ERROR: - ret = -ENOPROTOOPT; - break; case SO_DONTROUTE: sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); sk_dst_reset(sk); @@ -1214,15 +1294,6 @@ set_sndbuf: sk->sk_no_check_tx = valbool; break; - case SO_PRIORITY: - if ((val >= 0 && val <= 6) || - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) - WRITE_ONCE(sk->sk_priority, val); - else - ret = -EPERM; - break; - case SO_LINGER: if (optlen < sizeof(ling)) { ret = -EINVAL; /* 1003.1g */ @@ -1248,14 +1319,6 @@ set_sndbuf: case SO_BSDCOMPAT: break; - case SO_PASSCRED: - assign_bit(SOCK_PASSCRED, &sock->flags, valbool); - break; - - case SO_PASSPIDFD: - assign_bit(SOCK_PASSPIDFD, &sock->flags, valbool); - break; - case SO_TIMESTAMP_OLD: case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: @@ -1361,9 +1424,6 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool); break; - case SO_PASSSEC: - assign_bit(SOCK_PASSSEC, &sock->flags, valbool); - break; case SO_MARK: if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { @@ -1385,18 +1445,6 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; - case SO_PEEK_OFF: - { - int (*set_peek_off)(struct sock *sk, int val); - - set_peek_off = READ_ONCE(sock->ops)->set_peek_off; - if (set_peek_off) - ret = set_peek_off(sk, val); - else - ret = -EOPNOTSUPP; - break; - } - case SO_NOFCS: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; @@ -1405,50 +1453,7 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; -#ifdef CONFIG_NET_RX_BUSY_POLL - case SO_BUSY_POLL: - if (val < 0) - ret = -EINVAL; - else - WRITE_ONCE(sk->sk_ll_usec, val); - break; - case SO_PREFER_BUSY_POLL: - if (valbool && !sockopt_capable(CAP_NET_ADMIN)) - ret = -EPERM; - else - WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); - break; - case SO_BUSY_POLL_BUDGET: - if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) { - ret = -EPERM; - } else { - if (val < 0 || val > U16_MAX) - ret = -EINVAL; - else - WRITE_ONCE(sk->sk_busy_poll_budget, val); - } - break; -#endif - case SO_MAX_PACING_RATE: - { - unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; - - if (sizeof(ulval) != sizeof(val) && - optlen >= sizeof(ulval) && - copy_from_sockptr(&ulval, optval, sizeof(ulval))) { - ret = -EFAULT; - break; - } - if (ulval != ~0UL) - cmpxchg(&sk->sk_pacing_status, - SK_PACING_NONE, - SK_PACING_NEEDED); - /* Pairs with READ_ONCE() from sk_getsockopt() */ - WRITE_ONCE(sk->sk_max_pacing_rate, ulval); - sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval); - break; - } case SO_INCOMING_CPU: reuseport_update_incoming_cpu(sk, val); break; @@ -1533,19 +1538,6 @@ set_sndbuf: break; } - case SO_TXREHASH: - if (val < -1 || val > 1) { - ret = -EINVAL; - break; - } - if ((u8)val == SOCK_TXREHASH_DEFAULT) - val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); - /* Paired with READ_ONCE() in tcp_rtx_synack() - * and sk_getsockopt(). - */ - WRITE_ONCE(sk->sk_txrehash, (u8)val); - break; - default: ret = -ENOPROTOOPT; break; @@ -2018,14 +2010,6 @@ lenout: return 0; } -int sock_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - return sk_getsockopt(sock->sk, level, optname, - USER_SOCKPTR(optval), - USER_SOCKPTR(optlen)); -} - /* * Initialize an sk_lock. * @@ -3010,6 +2994,11 @@ void __sk_flush_backlog(struct sock *sk) { spin_lock_bh(&sk->sk_lock.slock); __release_sock(sk); + + if (sk->sk_prot->release_cb) + INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, + tcp_release_cb, sk); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(__sk_flush_backlog); @@ -3046,21 +3035,29 @@ EXPORT_SYMBOL(sk_wait_data); * @amt: pages to allocate * @kind: allocation type * - * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc + * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc. + * + * Unlike the globally shared limits among the sockets under same protocol, + * consuming the budget of a memcg won't have direct effect on other ones. + * So be optimistic about memcg's tolerance, and leave the callers to decide + * whether or not to raise allocated through sk_under_memory_pressure() or + * its variants. */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { - bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg; + struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL; struct proto *prot = sk->sk_prot; - bool charged = true; + bool charged = false; long allocated; sk_memory_allocated_add(sk, amt); allocated = sk_memory_allocated(sk); - if (memcg_charge && - !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt, - gfp_memcg_charge()))) - goto suppress_allocation; + + if (memcg) { + if (!mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge())) + goto suppress_allocation; + charged = true; + } /* Under limit. */ if (allocated <= sk_prot_mem_limits(sk, 0)) { @@ -3076,7 +3073,14 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (allocated > sk_prot_mem_limits(sk, 2)) goto suppress_allocation; - /* guarantee minimum buffer size under pressure */ + /* Guarantee minimum buffer size under pressure (either global + * or memcg) to make sure features described in RFC 7323 (TCP + * Extensions for High Performance) work properly. + * + * This rule does NOT stand when exceeds global or memcg's hard + * limit, or else a DoS attack can be taken place by spawning + * lots of sockets whose usage are under minimum buffer size. + */ if (kind == SK_MEM_RECV) { if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot)) return 1; @@ -3095,8 +3099,17 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) if (sk_has_memory_pressure(sk)) { u64 alloc; - if (!sk_under_memory_pressure(sk)) + /* The following 'average' heuristic is within the + * scope of global accounting, so it only makes + * sense for global memory pressure. + */ + if (!sk_under_global_memory_pressure(sk)) return 1; + + /* Try to be fair among all the sockets under global + * pressure by allowing the ones that below average + * usage to raise. + */ alloc = sk_sockets_allocated_read_positive(sk); if (sk_prot_mem_limits(sk, 2) > alloc * sk_mem_pages(sk->sk_wmem_queued + @@ -3115,8 +3128,8 @@ suppress_allocation: */ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { /* Force charge with __GFP_NOFAIL */ - if (memcg_charge && !charged) { - mem_cgroup_charge_skmem(sk->sk_memcg, amt, + if (memcg && !charged) { + mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge() | __GFP_NOFAIL); } return 1; @@ -3128,8 +3141,8 @@ suppress_allocation: sk_memory_allocated_sub(sk, amt); - if (memcg_charge && charged) - mem_cgroup_uncharge_skmem(sk->sk_memcg, amt); + if (charged) + mem_cgroup_uncharge_skmem(memcg, amt); return 0; } @@ -3528,11 +3541,9 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); - /* Warning : release_cb() might need to release sk ownership, - * ie call sock_release_ownership(sk) before us. - */ if (sk->sk_prot->release_cb) - sk->sk_prot->release_cb(sk); + INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, + tcp_release_cb, sk); sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) diff --git a/net/core/xdp.c b/net/core/xdp.c index a70670fe9a..b6f1d6dab3 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -696,9 +696,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf) return nxdpf; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in vmlinux BTF"); +__bpf_kfunc_start_defs(); /** * bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp. @@ -738,10 +736,10 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, return -EOPNOTSUPP; } -__diag_pop(); +__bpf_kfunc_end_defs(); BTF_SET8_START(xdp_metadata_kfunc_ids) -#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) +#define XDP_METADATA_KFUNC(_, __, name, ___) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC BTF_SET8_END(xdp_metadata_kfunc_ids) @@ -752,7 +750,7 @@ static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = { }; BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted) -#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str) +#define XDP_METADATA_KFUNC(name, _, str, __) BTF_ID(func, str) XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 524b7e581a..44b033fe1e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -511,7 +511,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, rcu_dereference(ireq->ireq_opt), - inet_sk(sk)->tos); + READ_ONCE(inet_sk(sk)->tos)); rcu_read_unlock(); err = net_xmit_eval(err); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6f5a556f4f..4550b68066 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -180,7 +180,7 @@ static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { @@ -239,7 +239,7 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, - np->tclass, sk->sk_priority); + np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -671,10 +671,10 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { @@ -839,7 +839,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { diff --git a/net/dccp/timer.c b/net/dccp/timer.c index b3255e87cc..a4cfb47b60 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -196,8 +196,8 @@ static void dccp_delack_timer(struct timer_list *t) if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, - icsk->icsk_rto); + icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, + icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. diff --git a/net/devlink/core.c b/net/devlink/core.c index 6cec4afb01..bc3d265fe2 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -16,6 +16,222 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report); DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC); +static struct devlink *devlinks_xa_get(unsigned long index) +{ + struct devlink *devlink; + + rcu_read_lock(); + devlink = xa_find(&devlinks, &index, index, DEVLINK_REGISTERED); + if (!devlink || !devlink_try_get(devlink)) + devlink = NULL; + rcu_read_unlock(); + return devlink; +} + +/* devlink_rels xarray contains 1:1 relationships between + * devlink object and related nested devlink instance. + * The xarray index is used to get the nested object from + * the nested-in object code. + */ +static DEFINE_XARRAY_FLAGS(devlink_rels, XA_FLAGS_ALLOC1); + +#define DEVLINK_REL_IN_USE XA_MARK_0 + +struct devlink_rel { + u32 index; + refcount_t refcount; + u32 devlink_index; + struct { + u32 devlink_index; + u32 obj_index; + devlink_rel_notify_cb_t *notify_cb; + devlink_rel_cleanup_cb_t *cleanup_cb; + struct delayed_work notify_work; + } nested_in; +}; + +static void devlink_rel_free(struct devlink_rel *rel) +{ + xa_erase(&devlink_rels, rel->index); + kfree(rel); +} + +static void __devlink_rel_get(struct devlink_rel *rel) +{ + refcount_inc(&rel->refcount); +} + +static void __devlink_rel_put(struct devlink_rel *rel) +{ + if (refcount_dec_and_test(&rel->refcount)) + devlink_rel_free(rel); +} + +static void devlink_rel_nested_in_notify_work(struct work_struct *work) +{ + struct devlink_rel *rel = container_of(work, struct devlink_rel, + nested_in.notify_work.work); + struct devlink *devlink; + + devlink = devlinks_xa_get(rel->nested_in.devlink_index); + if (!devlink) + goto rel_put; + if (!devl_trylock(devlink)) { + devlink_put(devlink); + goto reschedule_work; + } + if (!devl_is_registered(devlink)) { + devl_unlock(devlink); + devlink_put(devlink); + goto rel_put; + } + if (!xa_get_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE)) + rel->nested_in.cleanup_cb(devlink, rel->nested_in.obj_index, rel->index); + rel->nested_in.notify_cb(devlink, rel->nested_in.obj_index); + devl_unlock(devlink); + devlink_put(devlink); + +rel_put: + __devlink_rel_put(rel); + return; + +reschedule_work: + schedule_delayed_work(&rel->nested_in.notify_work, 1); +} + +static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel) +{ + __devlink_rel_get(rel); + schedule_delayed_work(&rel->nested_in.notify_work, 0); +} + +static struct devlink_rel *devlink_rel_alloc(void) +{ + struct devlink_rel *rel; + static u32 next; + int err; + + rel = kzalloc(sizeof(*rel), GFP_KERNEL); + if (!rel) + return ERR_PTR(-ENOMEM); + + err = xa_alloc_cyclic(&devlink_rels, &rel->index, rel, + xa_limit_32b, &next, GFP_KERNEL); + if (err) { + kfree(rel); + return ERR_PTR(err); + } + + refcount_set(&rel->refcount, 1); + INIT_DELAYED_WORK(&rel->nested_in.notify_work, + &devlink_rel_nested_in_notify_work); + return rel; +} + +static void devlink_rel_put(struct devlink *devlink) +{ + struct devlink_rel *rel = devlink->rel; + + if (!rel) + return; + xa_clear_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE); + devlink_rel_nested_in_notify_work_schedule(rel); + __devlink_rel_put(rel); + devlink->rel = NULL; +} + +void devlink_rel_nested_in_clear(u32 rel_index) +{ + xa_clear_mark(&devlink_rels, rel_index, DEVLINK_REL_IN_USE); +} + +int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index, + u32 obj_index, devlink_rel_notify_cb_t *notify_cb, + devlink_rel_cleanup_cb_t *cleanup_cb, + struct devlink *devlink) +{ + struct devlink_rel *rel = devlink_rel_alloc(); + + ASSERT_DEVLINK_NOT_REGISTERED(devlink); + + if (IS_ERR(rel)) + return PTR_ERR(rel); + + rel->devlink_index = devlink->index; + rel->nested_in.devlink_index = devlink_index; + rel->nested_in.obj_index = obj_index; + rel->nested_in.notify_cb = notify_cb; + rel->nested_in.cleanup_cb = cleanup_cb; + *rel_index = rel->index; + xa_set_mark(&devlink_rels, rel->index, DEVLINK_REL_IN_USE); + devlink->rel = rel; + return 0; +} + +/** + * devlink_rel_nested_in_notify - Notify the object this devlink + * instance is nested in. + * @devlink: devlink + * + * This is called upon network namespace change of devlink instance. + * In case this devlink instance is nested in another devlink object, + * a notification of a change of this object should be sent + * over netlink. The parent devlink instance lock needs to be + * taken during the notification preparation. + * However, since the devlink lock of nested instance is held here, + * we would end with wrong devlink instance lock ordering and + * deadlock. Therefore the work is utilized to avoid that. + */ +void devlink_rel_nested_in_notify(struct devlink *devlink) +{ + struct devlink_rel *rel = devlink->rel; + + if (!rel) + return; + devlink_rel_nested_in_notify_work_schedule(rel); +} + +static struct devlink_rel *devlink_rel_find(unsigned long rel_index) +{ + return xa_find(&devlink_rels, &rel_index, rel_index, + DEVLINK_REL_IN_USE); +} + +static struct devlink *devlink_rel_devlink_get(u32 rel_index) +{ + struct devlink_rel *rel; + u32 devlink_index; + + if (!rel_index) + return NULL; + xa_lock(&devlink_rels); + rel = devlink_rel_find(rel_index); + if (rel) + devlink_index = rel->devlink_index; + xa_unlock(&devlink_rels); + if (!rel) + return NULL; + return devlinks_xa_get(devlink_index); +} + +int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, + u32 rel_index, int attrtype, + bool *msg_updated) +{ + struct net *net = devlink_net(devlink); + struct devlink *rel_devlink; + int err; + + rel_devlink = devlink_rel_devlink_get(rel_index); + if (!rel_devlink) + return 0; + err = devlink_nl_put_nested_handle(msg, net, rel_devlink, attrtype); + devlink_put(rel_devlink); + if (!err && msg_updated) + *msg_updated = true; + return err; +} + void *devlink_priv(struct devlink *devlink) { return &devlink->priv; @@ -97,6 +313,7 @@ static void devlink_release(struct work_struct *work) mutex_destroy(&devlink->lock); lockdep_unregister_key(&devlink->lock_key); + put_device(devlink->dev); kfree(devlink); } @@ -142,6 +359,7 @@ int devl_register(struct devlink *devlink) xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); devlink_notify_register(devlink); + devlink_rel_nested_in_notify(devlink); return 0; } @@ -166,6 +384,7 @@ void devl_unregister(struct devlink *devlink) devlink_notify_unregister(devlink); xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); + devlink_rel_put(devlink); } EXPORT_SYMBOL_GPL(devl_unregister); @@ -210,11 +429,12 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (ret < 0) goto err_xa_alloc; - devlink->dev = dev; + devlink->dev = get_device(dev); devlink->ops = ops; xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); xa_init_flags(&devlink->params, XA_FLAGS_ALLOC); xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); + xa_init_flags(&devlink->nested_rels, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); INIT_LIST_HEAD(&devlink->rate_list); INIT_LIST_HEAD(&devlink->linecard_list); @@ -261,6 +481,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->linecard_list)); WARN_ON(!xa_empty(&devlink->ports)); + xa_destroy(&devlink->nested_rels); xa_destroy(&devlink->snapshot_ids); xa_destroy(&devlink->params); xa_destroy(&devlink->ports); @@ -308,14 +529,20 @@ static int __init devlink_init(void) { int err; - err = genl_register_family(&devlink_nl_family); - if (err) - goto out; err = register_pernet_subsys(&devlink_pernet_ops); if (err) goto out; + err = genl_register_family(&devlink_nl_family); + if (err) + goto out_unreg_pernet_subsys; err = register_netdevice_notifier(&devlink_port_netdevice_nb); + if (!err) + return 0; + + genl_unregister_family(&devlink_nl_family); +out_unreg_pernet_subsys: + unregister_pernet_subsys(&devlink_pernet_ops); out: WARN_ON(err); return err; diff --git a/net/devlink/dev.c b/net/devlink/dev.c index bba4ace7d2..4fc7adb326 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -138,6 +138,23 @@ nla_put_failure: return -EMSGSIZE; } +static int devlink_nl_nested_fill(struct sk_buff *msg, struct devlink *devlink) +{ + unsigned long rel_index; + void *unused; + int err; + + xa_for_each(&devlink->nested_rels, rel_index, unused) { + err = devlink_rel_devlink_handle_put(msg, devlink, + rel_index, + DEVLINK_ATTR_NESTED_DEVLINK, + NULL); + if (err) + return err; + } + return 0; +} + static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags) @@ -164,6 +181,10 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, goto dev_stats_nest_cancel; nla_nest_end(msg, dev_stats); + + if (devlink_nl_nested_fill(msg, devlink)) + goto nla_put_failure; + genlmsg_end(msg, hdr); return 0; @@ -230,6 +251,34 @@ int devlink_nl_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) return devlink_nl_dumpit(msg, cb, devlink_nl_get_dump_one); } +static void devlink_rel_notify_cb(struct devlink *devlink, u32 obj_index) +{ + devlink_notify(devlink, DEVLINK_CMD_NEW); +} + +static void devlink_rel_cleanup_cb(struct devlink *devlink, u32 obj_index, + u32 rel_index) +{ + xa_erase(&devlink->nested_rels, rel_index); +} + +int devl_nested_devlink_set(struct devlink *devlink, + struct devlink *nested_devlink) +{ + u32 rel_index; + int err; + + err = devlink_rel_nested_in_add(&rel_index, devlink->index, 0, + devlink_rel_notify_cb, + devlink_rel_cleanup_cb, + nested_devlink); + if (err) + return err; + return xa_insert(&devlink->nested_rels, rel_index, + xa_mk_value(0), GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(devl_nested_devlink_set); + void devlink_notify_register(struct devlink *devlink) { devlink_notify(devlink, DEVLINK_CMD_NEW); @@ -372,6 +421,7 @@ static void devlink_reload_netns_change(struct devlink *devlink, devlink_notify_unregister(devlink); write_pnet(&devlink->_net, dest_net); devlink_notify_register(devlink); + devlink_rel_nested_in_notify(devlink); } int devlink_reload(struct devlink *devlink, struct net *dest_net, @@ -442,7 +492,7 @@ free_msg: return -EMSGSIZE; } -int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; enum devlink_reload_action action; @@ -608,7 +658,7 @@ nla_put_failure: return err; } -int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct sk_buff *msg; @@ -629,7 +679,7 @@ int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } -int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; @@ -1058,7 +1108,7 @@ static int devlink_flash_component_get(struct devlink *devlink, return 0; } -int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *nla_overwrite_mask, *nla_file_name; struct devlink_flash_update_params params = {}; @@ -1301,7 +1351,7 @@ static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG }, }; -int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1]; struct devlink *devlink = info->user_ptr[0]; diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index f6b5fea2e1..183dbe3807 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -17,6 +17,8 @@ #include "netlink_gen.h" +struct devlink_rel; + #define DEVLINK_REGISTERED XA_MARK_1 #define DEVLINK_RELOAD_STATS_ARRAY_SIZE \ @@ -55,6 +57,8 @@ struct devlink { u8 reload_failed:1; refcount_t refcount; struct rcu_work rwork; + struct devlink_rel *rel; + struct xarray nested_rels; char priv[] __aligned(NETDEV_ALIGN); }; @@ -92,6 +96,20 @@ static inline bool devl_is_registered(struct devlink *devlink) return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED); } +typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index); +typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index, + u32 rel_index); + +void devlink_rel_nested_in_clear(u32 rel_index); +int devlink_rel_nested_in_add(u32 *rel_index, u32 devlink_index, + u32 obj_index, devlink_rel_notify_cb_t *notify_cb, + devlink_rel_cleanup_cb_t *cleanup_cb, + struct devlink *devlink); +void devlink_rel_nested_in_notify(struct devlink *devlink); +int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink, + u32 rel_index, int attrtype, + bool *msg_updated); + /* Netlink */ #define DEVLINK_NL_FLAG_NEED_PORT BIT(0) #define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1) @@ -145,6 +163,8 @@ devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) return 0; } +int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, + struct devlink *devlink, int attrtype); int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info); /* Notify */ @@ -206,80 +226,4 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack); /* Linecards */ -struct devlink_linecard { - struct list_head list; - struct devlink *devlink; - unsigned int index; - const struct devlink_linecard_ops *ops; - void *priv; - enum devlink_linecard_state state; - struct mutex state_lock; /* Protects state */ - const char *type; - struct devlink_linecard_type *types; - unsigned int types_count; - struct devlink *nested_devlink; -}; - -/* Devlink nl cmds */ -int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb); -int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, - struct netlink_callback *cb); -int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, - struct netlink_callback *cb); -int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb, - struct genl_info *info); -int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info); -int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, - struct genl_info *info); +unsigned int devlink_linecard_index(struct devlink_linecard *linecard); diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c index 431227c412..a72a9292ef 100644 --- a/net/devlink/dpipe.c +++ b/net/devlink/dpipe.c @@ -289,7 +289,7 @@ err_table_put: return err; } -int devlink_nl_cmd_dpipe_table_get(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_dpipe_table_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const char *table_name = NULL; @@ -562,8 +562,8 @@ send_done: return genlmsg_reply(dump_ctx.skb, info); } -int devlink_nl_cmd_dpipe_entries_get(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_dpipe_entries_get_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_dpipe_table *table; @@ -712,8 +712,8 @@ err_table_put: return err; } -int devlink_nl_cmd_dpipe_headers_get(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_dpipe_headers_get_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; @@ -746,8 +746,8 @@ static int devlink_dpipe_table_counters_set(struct devlink *devlink, return 0; } -int devlink_nl_cmd_dpipe_table_counters_set(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const char *table_name; diff --git a/net/devlink/health.c b/net/devlink/health.c index 51e6e81e31..695df61f8a 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -19,6 +19,7 @@ struct devlink_fmsg_item { struct devlink_fmsg { struct list_head item_list; + int err; /* first error encountered on some devlink_fmsg_XXX() call */ bool putting_binary; /* This flag forces enclosing of binary data * in an array brackets. It forces using * of designated API: @@ -451,8 +452,8 @@ int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb, devlink_nl_health_reporter_get_dump_one); } -int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; @@ -562,21 +563,18 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter, return 0; reporter->dump_fmsg = devlink_fmsg_alloc(); - if (!reporter->dump_fmsg) { - err = -ENOMEM; - return err; - } + if (!reporter->dump_fmsg) + return -ENOMEM; - err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); - if (err) - goto dump_err; + devlink_fmsg_obj_nest_start(reporter->dump_fmsg); err = reporter->ops->dump(reporter, reporter->dump_fmsg, priv_ctx, extack); if (err) goto dump_err; - err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg); + devlink_fmsg_obj_nest_end(reporter->dump_fmsg); + err = reporter->dump_fmsg->err; if (err) goto dump_err; @@ -657,8 +655,8 @@ devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, } EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); -int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; @@ -670,373 +668,258 @@ int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, return devlink_health_reporter_recover(reporter, NULL, info->extack); } -static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, - int attrtype) +static void devlink_fmsg_err_if_binary(struct devlink_fmsg *fmsg) +{ + if (!fmsg->err && fmsg->putting_binary) + fmsg->err = -EINVAL; +} + +static void devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, int attrtype) { struct devlink_fmsg_item *item; + if (fmsg->err) + return; + item = kzalloc(sizeof(*item), GFP_KERNEL); - if (!item) - return -ENOMEM; + if (!item) { + fmsg->err = -ENOMEM; + return; + } item->attrtype = attrtype; list_add_tail(&item->list, &fmsg->item_list); - - return 0; } -int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) +void devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); } EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start); -static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) +static void devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); } -int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) +void devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_nest_end(fmsg); + devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end); #define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN) -static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) +static void devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) { struct devlink_fmsg_item *item; - if (fmsg->putting_binary) - return -EINVAL; + devlink_fmsg_err_if_binary(fmsg); + if (fmsg->err) + return; - if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) - return -EMSGSIZE; + if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) { + fmsg->err = -EMSGSIZE; + return; + } item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL); - if (!item) - return -ENOMEM; + if (!item) { + fmsg->err = -ENOMEM; + return; + } item->nla_type = NLA_NUL_STRING; item->len = strlen(name) + 1; item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME; memcpy(&item->value, name, item->len); list_add_tail(&item->list, &fmsg->item_list); - - return 0; } -int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) +void devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) { - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); - if (err) - return err; - - err = devlink_fmsg_put_name(fmsg, name); - if (err) - return err; - - return 0; + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); + devlink_fmsg_put_name(fmsg, name); } EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start); -int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) +void devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_nest_end(fmsg); + devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); -int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) +void devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) { - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); } EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start); -int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) +void devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) { - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_nest_end(fmsg); - if (err) - return err; - - err = devlink_fmsg_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_nest_end(fmsg); + devlink_fmsg_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end); -int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) +void devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) { - int err; - - err = devlink_fmsg_arr_pair_nest_start(fmsg, name); - if (err) - return err; - + devlink_fmsg_arr_pair_nest_start(fmsg, name); fmsg->putting_binary = true; - return err; } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start); -int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg) +void devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg) { + if (fmsg->err) + return; + if (!fmsg->putting_binary) - return -EINVAL; + fmsg->err = -EINVAL; fmsg->putting_binary = false; - return devlink_fmsg_arr_pair_nest_end(fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end); -static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, - const void *value, u16 value_len, - u8 value_nla_type) +static void devlink_fmsg_put_value(struct devlink_fmsg *fmsg, + const void *value, u16 value_len, + u8 value_nla_type) { struct devlink_fmsg_item *item; - if (value_len > DEVLINK_FMSG_MAX_SIZE) - return -EMSGSIZE; + if (fmsg->err) + return; + + if (value_len > DEVLINK_FMSG_MAX_SIZE) { + fmsg->err = -EMSGSIZE; + return; + } item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL); - if (!item) - return -ENOMEM; + if (!item) { + fmsg->err = -ENOMEM; + return; + } item->nla_type = value_nla_type; item->len = value_len; item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; memcpy(&item->value, value, item->len); list_add_tail(&item->list, &fmsg->item_list); - - return 0; } -static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) +static void devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); } -static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) +static void devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); } -int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) +void devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); -static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) +static void devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); } -int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) +void devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) { - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, - NLA_NUL_STRING); + devlink_fmsg_err_if_binary(fmsg); + devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, NLA_NUL_STRING); } EXPORT_SYMBOL_GPL(devlink_fmsg_string_put); -int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len) +void devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len) { - if (!fmsg->putting_binary) - return -EINVAL; + if (!fmsg->err && !fmsg->putting_binary) + fmsg->err = -EINVAL; - return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); + devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); -int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value) +void devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_bool_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_bool_put(fmsg, value); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put); -int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value) +void devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u8_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_u8_put(fmsg, value); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put); -int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value) +void devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u32_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_u32_put(fmsg, value); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put); -int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value) +void devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u64_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_u64_put(fmsg, value); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put); -int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value) +void devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value) { - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_string_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; + devlink_fmsg_pair_nest_start(fmsg, name); + devlink_fmsg_string_put(fmsg, value); + devlink_fmsg_pair_nest_end(fmsg); } EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put); -int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u32 value_len) +void devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u32 value_len) { u32 data_size; - int end_err; u32 offset; - int err; - err = devlink_fmsg_binary_pair_nest_start(fmsg, name); - if (err) - return err; + devlink_fmsg_binary_pair_nest_start(fmsg, name); for (offset = 0; offset < value_len; offset += data_size) { data_size = value_len - offset; if (data_size > DEVLINK_FMSG_MAX_SIZE) data_size = DEVLINK_FMSG_MAX_SIZE; - err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); - if (err) - break; - /* Exit from loop with a break (instead of - * return) to make sure putting_binary is turned off in - * devlink_fmsg_binary_pair_nest_end - */ - } - end_err = devlink_fmsg_binary_pair_nest_end(fmsg); - if (end_err) - err = end_err; + devlink_fmsg_binary_put(fmsg, value + offset, data_size); + } - return err; + devlink_fmsg_binary_pair_nest_end(fmsg); + fmsg->putting_binary = false; } EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put); @@ -1146,6 +1029,9 @@ static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, void *hdr; int err; + if (fmsg->err) + return fmsg->err; + while (!last) { int tmp_index = index; @@ -1199,6 +1085,9 @@ static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb, void *hdr; int err; + if (fmsg->err) + return fmsg->err; + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd); if (!hdr) { @@ -1219,8 +1108,8 @@ nla_put_failure: return err; } -int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; @@ -1238,17 +1127,13 @@ int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, if (!fmsg) return -ENOMEM; - err = devlink_fmsg_obj_nest_start(fmsg); - if (err) - goto out; + devlink_fmsg_obj_nest_start(fmsg); err = reporter->ops->diagnose(reporter, fmsg, info->extack); if (err) goto out; - err = devlink_fmsg_obj_nest_end(fmsg); - if (err) - goto out; + devlink_fmsg_obj_nest_end(fmsg); err = devlink_fmsg_snd(fmsg, info, DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0); @@ -1278,8 +1163,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb) return reporter; } -int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) +int devlink_nl_health_reporter_dump_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_health_reporter *reporter; @@ -1317,8 +1202,8 @@ unlock: return err; } -int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; @@ -1334,8 +1219,8 @@ int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, return 0; } -int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_health_reporter *reporter; diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c index 85c32c314b..2f1c317b64 100644 --- a/net/devlink/linecard.c +++ b/net/devlink/linecard.c @@ -6,6 +6,25 @@ #include "devl_internal.h" +struct devlink_linecard { + struct list_head list; + struct devlink *devlink; + unsigned int index; + const struct devlink_linecard_ops *ops; + void *priv; + enum devlink_linecard_state state; + struct mutex state_lock; /* Protects state */ + const char *type; + struct devlink_linecard_type *types; + unsigned int types_count; + u32 rel_index; +}; + +unsigned int devlink_linecard_index(struct devlink_linecard *linecard) +{ + return linecard->index; +} + static struct devlink_linecard * devlink_linecard_get_by_index(struct devlink *devlink, unsigned int linecard_index) @@ -46,24 +65,6 @@ devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info) return devlink_linecard_get_from_attrs(devlink, info->attrs); } -static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink) -{ - struct nlattr *nested_attr; - - nested_attr = nla_nest_start(msg, DEVLINK_ATTR_NESTED_DEVLINK); - if (!nested_attr) - return -EMSGSIZE; - if (devlink_nl_put_handle(msg, devlink)) - goto nla_put_failure; - - nla_nest_end(msg, nested_attr); - return 0; - -nla_put_failure: - nla_nest_cancel(msg, nested_attr); - return -EMSGSIZE; -} - struct devlink_linecard_type { const char *type; const void *priv; @@ -111,8 +112,10 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg, nla_nest_end(msg, attr); } - if (linecard->nested_devlink && - devlink_nl_put_nested_handle(msg, linecard->nested_devlink)) + if (devlink_rel_devlink_handle_put(msg, devlink, + linecard->rel_index, + DEVLINK_ATTR_NESTED_DEVLINK, + NULL)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -366,8 +369,7 @@ out: return err; } -int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_linecard_set_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; @@ -521,7 +523,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set); void devlink_linecard_provision_clear(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); - WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED; linecard->type = NULL; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); @@ -540,7 +541,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_clear); void devlink_linecard_provision_fail(struct devlink_linecard *linecard) { mutex_lock(&linecard->state_lock); - WARN_ON(linecard->nested_devlink); linecard->state = DEVLINK_LINECARD_STATE_PROVISIONING_FAILED; devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); mutex_unlock(&linecard->state_lock); @@ -588,6 +588,27 @@ void devlink_linecard_deactivate(struct devlink_linecard *linecard) } EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); +static void devlink_linecard_rel_notify_cb(struct devlink *devlink, + u32 linecard_index) +{ + struct devlink_linecard *linecard; + + linecard = devlink_linecard_get_by_index(devlink, linecard_index); + if (!linecard) + return; + devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); +} + +static void devlink_linecard_rel_cleanup_cb(struct devlink *devlink, + u32 linecard_index, u32 rel_index) +{ + struct devlink_linecard *linecard; + + linecard = devlink_linecard_get_by_index(devlink, linecard_index); + if (linecard && linecard->rel_index == rel_index) + linecard->rel_index = 0; +} + /** * devlink_linecard_nested_dl_set - Attach/detach nested devlink * instance to linecard. @@ -595,12 +616,14 @@ EXPORT_SYMBOL_GPL(devlink_linecard_deactivate); * @linecard: devlink linecard * @nested_devlink: devlink instance to attach or NULL to detach */ -void devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, - struct devlink *nested_devlink) +int devlink_linecard_nested_dl_set(struct devlink_linecard *linecard, + struct devlink *nested_devlink) { - mutex_lock(&linecard->state_lock); - linecard->nested_devlink = nested_devlink; - devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW); - mutex_unlock(&linecard->state_lock); + return devlink_rel_nested_in_add(&linecard->rel_index, + linecard->devlink->index, + linecard->index, + devlink_linecard_rel_notify_cb, + devlink_linecard_rel_cleanup_cb, + nested_devlink); } EXPORT_SYMBOL_GPL(devlink_linecard_nested_dl_set); diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index fc3e7c029a..d0b90ebc8b 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -13,74 +13,37 @@ static const struct genl_multicast_group devlink_nl_mcgrps[] = { [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, }; -static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { - [DEVLINK_ATTR_UNSPEC] = { .strict_start_type = - DEVLINK_ATTR_TRAP_POLICER_ID }, - [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO, - DEVLINK_PORT_TYPE_IB), - [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 }, - [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, - [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, - [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY, - DEVLINK_ESWITCH_MODE_SWITCHDEV), - [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 }, - [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64}, - [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64}, - [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 }, - [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 }, - [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 }, - [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, - [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = - NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS), - [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, - [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 }, - [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 }, - [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 }, - [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 }, - [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 }, - [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED }, - [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT, - DEVLINK_RELOAD_ACTION_MAX), - [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK), - [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 }, - [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 }, - [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 }, - [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 }, - [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 }, - [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 }, - [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 }, - [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 }, - [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING }, - [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, - [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, - [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, - [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, -}; +int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, + struct devlink *devlink, int attrtype) +{ + struct nlattr *nested_attr; + struct net *devl_net; + + nested_attr = nla_nest_start(msg, attrtype); + if (!nested_attr) + return -EMSGSIZE; + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + rcu_read_lock(); + devl_net = read_pnet_rcu(&devlink->_net); + if (!net_eq(net, devl_net)) { + int id = peernet2id_alloc(net, devl_net, GFP_ATOMIC); + + rcu_read_unlock(); + if (nla_put_s32(msg, DEVLINK_ATTR_NETNS_ID, id)) + return -EMSGSIZE; + } else { + rcu_read_unlock(); + } + + nla_nest_end(msg, nested_attr); + return 0; + +nla_put_failure: + nla_nest_cancel(msg, nested_attr); + return -EMSGSIZE; +} int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info) { @@ -159,7 +122,7 @@ unlock: int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { - return __devlink_nl_pre_doit(skb, info, ops->internal_flags); + return __devlink_nl_pre_doit(skb, info, 0); } int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, @@ -255,269 +218,12 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, return devlink_nl_inst_iter_dumpit(msg, cb, flags, dump_one); } -static const struct genl_small_ops devlink_nl_small_ops[40] = { - { - .cmd = DEVLINK_CMD_PORT_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_RATE_SET, - .doit = devlink_nl_cmd_rate_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_RATE_NEW, - .doit = devlink_nl_cmd_rate_new_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_RATE_DEL, - .doit = devlink_nl_cmd_rate_del_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_PORT_SPLIT, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_split_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_PORT_UNSPLIT, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_unsplit_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_PORT_NEW, - .doit = devlink_nl_cmd_port_new_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_PORT_DEL, - .doit = devlink_nl_cmd_port_del_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - - { - .cmd = DEVLINK_CMD_LINECARD_SET, - .doit = devlink_nl_cmd_linecard_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_SB_POOL_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_pool_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_port_pool_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_tc_pool_bind_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_occ_snapshot_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_sb_occ_max_clear_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_ESWITCH_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_eswitch_get_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_ESWITCH_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_eswitch_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_dpipe_table_get, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_dpipe_entries_get, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_dpipe_headers_get, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_dpipe_table_counters_set, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_RESOURCE_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_resource_set, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_RESOURCE_DUMP, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_resource_dump, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_RELOAD, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_reload, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_PARAM_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_param_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_PORT_PARAM_GET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_param_get_doit, - .dumpit = devlink_nl_cmd_port_param_get_dumpit, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - /* can be retrieved by unprivileged users */ - }, - { - .cmd = DEVLINK_CMD_PORT_PARAM_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_port_param_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, - }, - { - .cmd = DEVLINK_CMD_REGION_NEW, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_region_new, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_REGION_DEL, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_region_del, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_REGION_READ, - .validate = GENL_DONT_VALIDATE_STRICT | - GENL_DONT_VALIDATE_DUMP_STRICT, - .dumpit = devlink_nl_cmd_region_read_dumpit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_set_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_recover_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_diagnose_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, - .validate = GENL_DONT_VALIDATE_STRICT | - GENL_DONT_VALIDATE_DUMP_STRICT, - .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_dump_clear_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_health_reporter_test_doit, - .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT, - }, - { - .cmd = DEVLINK_CMD_FLASH_UPDATE, - .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .doit = devlink_nl_cmd_flash_update, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_TRAP_SET, - .doit = devlink_nl_cmd_trap_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_TRAP_GROUP_SET, - .doit = devlink_nl_cmd_trap_group_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_TRAP_POLICER_SET, - .doit = devlink_nl_cmd_trap_policer_set_doit, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = DEVLINK_CMD_SELFTESTS_RUN, - .doit = devlink_nl_cmd_selftests_run, - .flags = GENL_ADMIN_PERM, - }, - /* -- No new ops here! Use split ops going forward! -- */ -}; - struct genl_family devlink_nl_family __ro_after_init = { .name = DEVLINK_GENL_NAME, .version = DEVLINK_GENL_VERSION, - .maxattr = DEVLINK_ATTR_MAX, - .policy = devlink_nl_policy, .netnsok = true, .parallel_ops = true, - .pre_doit = devlink_nl_pre_doit, - .post_doit = devlink_nl_post_doit, .module = THIS_MODULE, - .small_ops = devlink_nl_small_ops, - .n_small_ops = ARRAY_SIZE(devlink_nl_small_ops), .split_ops = devlink_nl_ops, .n_split_ops = ARRAY_SIZE(devlink_nl_ops), .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 467b7a431d..788dfdc498 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -10,6 +10,18 @@ #include <uapi/linux/devlink.h> +/* Common nested types */ +const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1] = { + [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY, }, + [DEVLINK_PORT_FN_ATTR_STATE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_PORT_FN_ATTR_OPSTATE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15), +}; + +const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = { + [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, }, +}; + /* DEVLINK_CMD_GET - do */ static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -29,6 +41,48 @@ static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_DEV_ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_PORT_SET - do */ +static const struct nla_policy devlink_port_set_nl_policy[DEVLINK_ATTR_PORT_FUNCTION + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_MAX(NLA_U16, 3), + [DEVLINK_ATTR_PORT_FUNCTION] = NLA_POLICY_NESTED(devlink_dl_port_function_nl_policy), +}; + +/* DEVLINK_CMD_PORT_NEW - do */ +static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_PORT_PCI_SF_NUMBER + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PORT_FLAVOUR] = NLA_POLICY_MAX(NLA_U16, 7), + [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16, }, + [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_PORT_DEL - do */ +static const struct nla_policy devlink_port_del_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_PORT_SPLIT - do */ +static const struct nla_policy devlink_port_split_nl_policy[DEVLINK_ATTR_PORT_SPLIT_COUNT + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_PORT_UNSPLIT - do */ +static const struct nla_policy devlink_port_unsplit_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_SB_GET - do */ static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -56,6 +110,16 @@ static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_D [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_SB_POOL_SET - do */ +static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, + [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_SB_PORT_POOL_GET - do */ static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -71,6 +135,16 @@ static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_A [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_SB_PORT_POOL_SET - do */ +static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_SB_THRESHOLD + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -87,6 +161,100 @@ static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLIN [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */ +static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, + [DEVLINK_ATTR_SB_POOL_TYPE] = NLA_POLICY_MAX(NLA_U8, 1), + [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16, }, + [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */ +static const struct nla_policy devlink_sb_occ_snapshot_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */ +static const struct nla_policy devlink_sb_occ_max_clear_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_ESWITCH_GET - do */ +static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_ESWITCH_SET - do */ +static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 1), + [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U16, 3), + [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1), +}; + +/* DEVLINK_CMD_DPIPE_TABLE_GET - do */ +static const struct nla_policy devlink_dpipe_table_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */ +static const struct nla_policy devlink_dpipe_entries_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_DPIPE_HEADERS_GET - do */ +static const struct nla_policy devlink_dpipe_headers_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */ +static const struct nla_policy devlink_dpipe_table_counters_set_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8, }, +}; + +/* DEVLINK_CMD_RESOURCE_SET - do */ +static const struct nla_policy devlink_resource_set_nl_policy[DEVLINK_ATTR_RESOURCE_SIZE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64, }, +}; + +/* DEVLINK_CMD_RESOURCE_DUMP - do */ +static const struct nla_policy devlink_resource_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_RELOAD - do */ +static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_RELOAD_LIMITS + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, 1, 2), + [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(6), + [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32, }, + [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32, }, + [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_PARAM_GET - do */ static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -100,6 +268,15 @@ static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_DEV [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_PARAM_SET - do */ +static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_PARAM_VALUE_CMODE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8, }, + [DEVLINK_ATTR_PARAM_VALUE_CMODE] = NLA_POLICY_MAX(NLA_U8, 2), +}; + /* DEVLINK_CMD_REGION_GET - do */ static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_REGION_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -114,6 +291,50 @@ static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_DE [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_REGION_NEW - do */ +static const struct nla_policy devlink_region_new_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_REGION_DEL - do */ +static const struct nla_policy devlink_region_del_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_REGION_READ - dump */ +static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_REGION_DIRECT + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, + [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG, }, + [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64, }, + [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64, }, +}; + +/* DEVLINK_CMD_PORT_PARAM_GET - do */ +static const struct nla_policy devlink_port_param_get_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_PORT_PARAM_SET - do */ +static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_INFO_GET - do */ static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -135,6 +356,58 @@ static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLIN [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; +/* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ +static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64, }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8, }, + [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ +static const struct nla_policy devlink_health_reporter_recover_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */ +static const struct nla_policy devlink_health_reporter_diagnose_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */ +static const struct nla_policy devlink_health_reporter_dump_get_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */ +static const struct nla_policy devlink_health_reporter_dump_clear_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_FLASH_UPDATE - do */ +static const struct nla_policy devlink_flash_update_nl_policy[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = NLA_POLICY_BITFIELD32(3), +}; + /* DEVLINK_CMD_TRAP_GET - do */ static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_TRAP_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -148,6 +421,14 @@ static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_DEV_ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_TRAP_SET - do */ +static const struct nla_policy devlink_trap_set_nl_policy[DEVLINK_ATTR_TRAP_ACTION + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2), +}; + /* DEVLINK_CMD_TRAP_GROUP_GET - do */ static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_TRAP_GROUP_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -161,6 +442,15 @@ static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATT [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_TRAP_GROUP_SET - do */ +static const struct nla_policy devlink_trap_group_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2), + [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, +}; + /* DEVLINK_CMD_TRAP_POLICER_GET - do */ static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -174,6 +464,23 @@ static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_A [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_TRAP_POLICER_SET - do */ +static const struct nla_policy devlink_trap_policer_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_BURST + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, + [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64, }, + [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64, }, +}; + +/* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */ +static const struct nla_policy devlink_health_reporter_test_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, +}; + /* DEVLINK_CMD_RATE_GET - do */ static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -188,6 +495,37 @@ static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_RATE_SET - do */ +static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, }, + [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_RATE_NEW - do */ +static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, }, + [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, }, + [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, }, + [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, }, +}; + +/* DEVLINK_CMD_RATE_DEL - do */ +static const struct nla_policy devlink_rate_del_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, +}; + /* DEVLINK_CMD_LINECARD_GET - do */ static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_LINECARD_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, @@ -201,14 +539,29 @@ static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_LINECARD_SET - do */ +static const struct nla_policy devlink_linecard_set_nl_policy[DEVLINK_ATTR_LINECARD_TYPE + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, }, + [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING, }, +}; + /* DEVLINK_CMD_SELFTESTS_GET - do */ static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, }; +/* DEVLINK_CMD_SELFTESTS_RUN - do */ +static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELFTESTS + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy), +}; + /* Ops table for devlink */ -const struct genl_split_ops devlink_nl_ops[32] = { +const struct genl_split_ops devlink_nl_ops[73] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -243,6 +596,56 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_PORT_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_set_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_FUNCTION, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_NEW, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_port_new_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_new_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_PCI_SF_NUMBER, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_DEL, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_del_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_del_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_SPLIT, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_split_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_split_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_SPLIT_COUNT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_UNSPLIT, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_unsplit_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_unsplit_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_SB_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -277,6 +680,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_SB_POOL_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_sb_pool_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_pool_set_nl_policy, + .maxattr = DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit_port, @@ -294,6 +707,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_SB_PORT_POOL_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_sb_port_pool_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_port_pool_set_nl_policy, + .maxattr = DEVLINK_ATTR_SB_THRESHOLD, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit_port, @@ -311,6 +734,126 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_sb_tc_pool_bind_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_tc_pool_bind_set_nl_policy, + .maxattr = DEVLINK_ATTR_SB_TC_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_SNAPSHOT, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_sb_occ_snapshot_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_occ_snapshot_nl_policy, + .maxattr = DEVLINK_ATTR_SB_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_SB_OCC_MAX_CLEAR, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_sb_occ_max_clear_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_sb_occ_max_clear_nl_policy, + .maxattr = DEVLINK_ATTR_SB_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_ESWITCH_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_eswitch_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_eswitch_get_nl_policy, + .maxattr = DEVLINK_ATTR_DEV_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_ESWITCH_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_eswitch_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_eswitch_set_nl_policy, + .maxattr = DEVLINK_ATTR_ESWITCH_ENCAP_MODE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_dpipe_table_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_dpipe_table_get_nl_policy, + .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_DPIPE_ENTRIES_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_dpipe_entries_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_dpipe_entries_get_nl_policy, + .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_DPIPE_HEADERS_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_dpipe_headers_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_dpipe_headers_get_nl_policy, + .maxattr = DEVLINK_ATTR_DEV_NAME, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_dpipe_table_counters_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_dpipe_table_counters_set_nl_policy, + .maxattr = DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RESOURCE_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_resource_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_resource_set_nl_policy, + .maxattr = DEVLINK_ATTR_RESOURCE_SIZE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RESOURCE_DUMP, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_resource_dump_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_resource_dump_nl_policy, + .maxattr = DEVLINK_ATTR_DEV_NAME, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RELOAD, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_reload_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_reload_nl_policy, + .maxattr = DEVLINK_ATTR_RELOAD_LIMITS, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_PARAM_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -328,6 +871,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_PARAM_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_param_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_param_set_nl_policy, + .maxattr = DEVLINK_ATTR_PARAM_VALUE_CMODE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_REGION_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit_port_optional, @@ -345,6 +898,60 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_REGION_NEW, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_region_new_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_region_new_nl_policy, + .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_REGION_DEL, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_region_del_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_region_del_nl_policy, + .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_REGION_READ, + .validate = GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = devlink_nl_region_read_dumpit, + .policy = devlink_region_read_nl_policy, + .maxattr = DEVLINK_ATTR_REGION_DIRECT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_GET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_param_get_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_param_get_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_GET, + .validate = GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = devlink_nl_port_param_get_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, + { + .cmd = DEVLINK_CMD_PORT_PARAM_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port, + .doit = devlink_nl_port_param_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_port_param_set_nl_policy, + .maxattr = DEVLINK_ATTR_PORT_INDEX, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_INFO_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -378,6 +985,64 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_set_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_recover_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_recover_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_diagnose_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_diagnose_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, + .validate = GENL_DONT_VALIDATE_DUMP_STRICT, + .dumpit = devlink_nl_health_reporter_dump_get_dumpit, + .policy = devlink_health_reporter_dump_get_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_dump_clear_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_dump_clear_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_FLASH_UPDATE, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_flash_update_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_flash_update_nl_policy, + .maxattr = DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_TRAP_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -395,6 +1060,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_TRAP_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_trap_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_trap_set_nl_policy, + .maxattr = DEVLINK_ATTR_TRAP_ACTION, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_TRAP_GROUP_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -412,6 +1087,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_TRAP_GROUP_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_trap_group_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_trap_group_set_nl_policy, + .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_TRAP_POLICER_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -429,6 +1114,26 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_TRAP_POLICER_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_trap_policer_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_trap_policer_set_nl_policy, + .maxattr = DEVLINK_ATTR_TRAP_POLICER_BURST, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_HEALTH_REPORTER_TEST, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit_port_optional, + .doit = devlink_nl_health_reporter_test_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_health_reporter_test_nl_policy, + .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_RATE_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -446,6 +1151,36 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_RATE_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_rate_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_rate_set_nl_policy, + .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RATE_NEW, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_rate_new_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_rate_new_nl_policy, + .maxattr = DEVLINK_ATTR_RATE_TX_WEIGHT, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = DEVLINK_CMD_RATE_DEL, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_rate_del_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_rate_del_nl_policy, + .maxattr = DEVLINK_ATTR_RATE_NODE_NAME, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_LINECARD_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -463,6 +1198,16 @@ const struct genl_split_ops devlink_nl_ops[32] = { .flags = GENL_CMD_CAP_DUMP, }, { + .cmd = DEVLINK_CMD_LINECARD_SET, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_linecard_set_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_linecard_set_nl_policy, + .maxattr = DEVLINK_ATTR_LINECARD_TYPE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { .cmd = DEVLINK_CMD_SELFTESTS_GET, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit, @@ -478,4 +1223,14 @@ const struct genl_split_ops devlink_nl_ops[32] = { .dumpit = devlink_nl_selftests_get_dumpit, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = DEVLINK_CMD_SELFTESTS_RUN, + .validate = GENL_DONT_VALIDATE_STRICT, + .pre_doit = devlink_nl_pre_doit, + .doit = devlink_nl_selftests_run_doit, + .post_doit = devlink_nl_post_doit, + .policy = devlink_selftests_run_nl_policy, + .maxattr = DEVLINK_ATTR_SELFTESTS, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, }; diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index f8bbc93e39..0e9e89c31c 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -11,8 +11,12 @@ #include <uapi/linux/devlink.h> +/* Common nested types */ +extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1]; +extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; + /* Ops table for devlink */ -extern const struct genl_split_ops devlink_nl_ops[32]; +extern const struct genl_split_ops devlink_nl_ops[73]; int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -30,25 +34,61 @@ int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int devlink_nl_port_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_port_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_del_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_split_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int devlink_nl_sb_pool_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_pool_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_port_pool_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_port_pool_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb, + struct genl_info *info); int devlink_nl_sb_tc_pool_bind_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_sb_tc_pool_bind_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_dpipe_table_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_entries_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_headers_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_param_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_region_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_region_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_region_read_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_port_param_get_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_port_param_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_port_param_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_info_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_info_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); @@ -56,24 +96,46 @@ int devlink_nl_health_reporter_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_health_reporter_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_dump_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_flash_update_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_group_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_group_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_policer_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_trap_policer_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_trap_policer_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, + struct genl_info *info); int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_rate_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_linecard_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_linecard_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_linecard_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info); #endif /* _LINUX_DEVLINK_GEN_H */ diff --git a/net/devlink/param.c b/net/devlink/param.c index 31275f9d4c..d74df09311 100644 --- a/net/devlink/param.c +++ b/net/devlink/param.c @@ -581,7 +581,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, return 0; } -int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; @@ -589,22 +589,22 @@ int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, struct genl_info *info) info, DEVLINK_CMD_PARAM_NEW); } -int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, - struct netlink_callback *cb) +int devlink_nl_port_param_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) { NL_SET_ERR_MSG(cb->extack, "Port params are not supported"); return msg->len; } -int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_port_param_get_doit(struct sk_buff *skb, + struct genl_info *info) { NL_SET_ERR_MSG(info->extack, "Port params are not supported"); return -EINVAL; } -int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_port_param_set_doit(struct sk_buff *skb, + struct genl_info *info) { NL_SET_ERR_MSG(info->extack, "Port params are not supported"); return -EINVAL; diff --git a/net/devlink/port.c b/net/devlink/port.c index 4763b42885..d39ee6053c 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -428,6 +428,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (err) goto out; err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); + if (err) + goto out; + err = devlink_rel_devlink_handle_put(msg, port->devlink, + port->rel_index, + DEVLINK_PORT_FN_ATTR_DEVLINK, + &msg_updated); + out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -483,7 +490,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, goto nla_put_failure; if (devlink_port->linecard && nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, - devlink_port->linecard->index)) + devlink_linecard_index(devlink_port->linecard))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -574,7 +581,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink, xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) { err = devlink_nl_port_fill(msg, devlink_port, - DEVLINK_CMD_NEW, + DEVLINK_CMD_PORT_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, cb->extack); @@ -665,7 +672,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, return -EOPNOTSUPP; } if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) { - NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE], "Function does not support state setting"); return -EOPNOTSUPP; } @@ -765,7 +772,7 @@ static int devlink_port_function_set(struct devlink_port *port, return err; } -int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_port_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; int err; @@ -791,7 +798,7 @@ int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) return 0; } -int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_port_split_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; @@ -822,8 +829,7 @@ int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, struct genl_info *info) info->extack); } -int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_port_unsplit_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; @@ -833,7 +839,7 @@ int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack); } -int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_port_new_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink_port_new_attrs new_attrs = {}; @@ -897,7 +903,7 @@ err_out_port_del: return err; } -int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_port_del_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct netlink_ext_ack *extack = info->extack; @@ -1392,6 +1398,50 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro } EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_sf_set); +static void devlink_port_rel_notify_cb(struct devlink *devlink, u32 port_index) +{ + struct devlink_port *devlink_port; + + devlink_port = devlink_port_get_by_index(devlink, port_index); + if (!devlink_port) + return; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); +} + +static void devlink_port_rel_cleanup_cb(struct devlink *devlink, u32 port_index, + u32 rel_index) +{ + struct devlink_port *devlink_port; + + devlink_port = devlink_port_get_by_index(devlink, port_index); + if (devlink_port && devlink_port->rel_index == rel_index) + devlink_port->rel_index = 0; +} + +/** + * devl_port_fn_devlink_set - Attach peer devlink + * instance to port function. + * @devlink_port: devlink port + * @fn_devlink: devlink instance to attach + */ +int devl_port_fn_devlink_set(struct devlink_port *devlink_port, + struct devlink *fn_devlink) +{ + ASSERT_DEVLINK_PORT_REGISTERED(devlink_port); + + if (WARN_ON(devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_SF || + devlink_port->attrs.pci_sf.external)) + return -EINVAL; + + return devlink_rel_nested_in_add(&devlink_port->rel_index, + devlink_port->devlink->index, + devlink_port->index, + devlink_port_rel_notify_cb, + devlink_port_rel_cleanup_cb, + fn_devlink); +} +EXPORT_SYMBOL_GPL(devl_port_fn_devlink_set); + /** * devlink_port_linecard_set - Link port with a linecard * @@ -1420,7 +1470,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, case DEVLINK_PORT_FLAVOUR_PHYSICAL: if (devlink_port->linecard) n = snprintf(name, len, "l%u", - devlink_port->linecard->index); + devlink_linecard_index(devlink_port->linecard)); if (n < len) n += snprintf(name + n, len - n, "p%u", attrs->phys.port_number); diff --git a/net/devlink/rate.c b/net/devlink/rate.c index dff1593b84..94b289b93f 100644 --- a/net/devlink/rate.c +++ b/net/devlink/rate.c @@ -458,7 +458,7 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, return true; } -int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *devlink_rate; @@ -480,7 +480,7 @@ int devlink_nl_cmd_rate_set_doit(struct sk_buff *skb, struct genl_info *info) return err; } -int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *rate_node; @@ -536,7 +536,7 @@ err_strdup: return err; } -int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_rate *rate_node; diff --git a/net/devlink/region.c b/net/devlink/region.c index d197cdb662..0aab7b82d6 100644 --- a/net/devlink/region.c +++ b/net/devlink/region.c @@ -588,7 +588,7 @@ int devlink_nl_region_get_dumpit(struct sk_buff *skb, return devlink_nl_dumpit(skb, cb, devlink_nl_region_get_dump_one); } -int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_region_del_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_snapshot *snapshot; @@ -633,7 +633,7 @@ int devlink_nl_cmd_region_del(struct sk_buff *skb, struct genl_info *info) return 0; } -int devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_region_new_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_snapshot *snapshot; @@ -863,8 +863,8 @@ devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, curr_offset, chunk_size, chunk); } -int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) +int devlink_nl_region_read_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct devlink_nl_dump_state *state = devlink_dump_state(cb); diff --git a/net/devlink/resource.c b/net/devlink/resource.c index c8b615e4c3..594c8aeb3b 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -105,7 +105,7 @@ devlink_resource_validate_size(struct devlink_resource *resource, u64 size, return err; } -int devlink_nl_cmd_resource_set(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_resource *resource; @@ -285,7 +285,7 @@ err_resource_put: return err; } -int devlink_nl_cmd_resource_dump(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; diff --git a/net/devlink/sb.c b/net/devlink/sb.c index bd677fff5e..0a76bb3250 100644 --- a/net/devlink/sb.c +++ b/net/devlink/sb.c @@ -413,7 +413,7 @@ static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, return -EOPNOTSUPP; } -int devlink_nl_cmd_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_sb_pool_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; enum devlink_sb_threshold_type threshold_type; @@ -621,8 +621,8 @@ static int devlink_sb_port_pool_set(struct devlink_port *devlink_port, return -EOPNOTSUPP; } -int devlink_nl_cmd_sb_port_pool_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_sb_port_pool_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; @@ -861,8 +861,8 @@ static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, return -EOPNOTSUPP; } -int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_sb_tc_pool_bind_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; @@ -900,8 +900,7 @@ int devlink_nl_cmd_sb_tc_pool_bind_set_doit(struct sk_buff *skb, pool_index, threshold, info->extack); } -int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_sb_occ_snapshot_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; @@ -916,8 +915,8 @@ int devlink_nl_cmd_sb_occ_snapshot_doit(struct sk_buff *skb, return -EOPNOTSUPP; } -int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_sb_occ_max_clear_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; const struct devlink_ops *ops = devlink->ops; diff --git a/net/devlink/trap.c b/net/devlink/trap.c index c26bf9b29b..c26313e7ca 100644 --- a/net/devlink/trap.c +++ b/net/devlink/trap.c @@ -414,7 +414,7 @@ static int devlink_trap_action_set(struct devlink *devlink, info->extack); } -int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, struct genl_info *info) +int devlink_nl_trap_set_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; @@ -684,8 +684,7 @@ static int devlink_trap_group_set(struct devlink *devlink, return 0; } -int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_trap_group_set_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; @@ -926,8 +925,8 @@ devlink_trap_policer_set(struct devlink *devlink, return 0; } -int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb, - struct genl_info *info) +int devlink_nl_trap_policer_set_doit(struct sk_buff *skb, + struct genl_info *info) { struct devlink_trap_policer_item *policer_item; struct netlink_ext_ack *extack = info->extack; diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 12e305824a..8a1894a425 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -8,16 +8,16 @@ endif # the core obj-$(CONFIG_NET_DSA) += dsa_core.o dsa_core-y += \ + conduit.o \ devlink.o \ dsa.o \ - master.o \ netlink.o \ port.o \ - slave.o \ switch.o \ tag.o \ tag_8021q.o \ - trace.o + trace.o \ + user.o # tagging formats obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o diff --git a/net/dsa/master.c b/net/dsa/conduit.c index 6be89ab0cc..3dfdb3cb47 100644 --- a/net/dsa/master.c +++ b/net/dsa/conduit.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Handling of a master device, switching frames via its switch fabric CPU port + * Handling of a conduit device, switching frames via its switch fabric CPU port * * Copyright (c) 2017 Savoir-faire Linux Inc. * Vivien Didelot <vivien.didelot@savoirfairelinux.com> @@ -11,12 +11,12 @@ #include <linux/netlink.h> #include <net/dsa.h> +#include "conduit.h" #include "dsa.h" -#include "master.h" #include "port.h" #include "tag.h" -static int dsa_master_get_regs_len(struct net_device *dev) +static int dsa_conduit_get_regs_len(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; @@ -45,8 +45,8 @@ static int dsa_master_get_regs_len(struct net_device *dev) return ret; } -static void dsa_master_get_regs(struct net_device *dev, - struct ethtool_regs *regs, void *data) +static void dsa_conduit_get_regs(struct net_device *dev, + struct ethtool_regs *regs, void *data) { struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; @@ -80,9 +80,9 @@ static void dsa_master_get_regs(struct net_device *dev, } } -static void dsa_master_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) +static void dsa_conduit_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) { struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; @@ -99,9 +99,9 @@ static void dsa_master_get_ethtool_stats(struct net_device *dev, ds->ops->get_ethtool_stats(ds, port, data + count); } -static void dsa_master_get_ethtool_phy_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) +static void dsa_conduit_get_ethtool_phy_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) { struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; @@ -125,7 +125,7 @@ static void dsa_master_get_ethtool_phy_stats(struct net_device *dev, ds->ops->get_ethtool_phy_stats(ds, port, data + count); } -static int dsa_master_get_sset_count(struct net_device *dev, int sset) +static int dsa_conduit_get_sset_count(struct net_device *dev, int sset) { struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; @@ -147,8 +147,8 @@ static int dsa_master_get_sset_count(struct net_device *dev, int sset) return count; } -static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, - uint8_t *data) +static void dsa_conduit_get_strings(struct net_device *dev, uint32_t stringset, + uint8_t *data) { struct dsa_port *cpu_dp = dev->dsa_ptr; const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; @@ -195,12 +195,12 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, } } -/* Deny PTP operations on master if there is at least one switch in the tree +/* Deny PTP operations on conduit if there is at least one switch in the tree * that is PTP capable. */ -int __dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack) +int __dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch *ds = cpu_dp->ds; @@ -212,7 +212,7 @@ int __dsa_master_hwtstamp_validate(struct net_device *dev, list_for_each_entry(dp, &dst->ports, list) { if (dsa_port_supports_hwtstamp(dp)) { NL_SET_ERR_MSG(extack, - "HW timestamping not allowed on DSA master when switch supports the operation"); + "HW timestamping not allowed on DSA conduit when switch supports the operation"); return -EBUSY; } } @@ -220,7 +220,7 @@ int __dsa_master_hwtstamp_validate(struct net_device *dev, return 0; } -static int dsa_master_ethtool_setup(struct net_device *dev) +static int dsa_conduit_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch *ds = cpu_dp->ds; @@ -237,19 +237,19 @@ static int dsa_master_ethtool_setup(struct net_device *dev) if (cpu_dp->orig_ethtool_ops) memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops)); - ops->get_regs_len = dsa_master_get_regs_len; - ops->get_regs = dsa_master_get_regs; - ops->get_sset_count = dsa_master_get_sset_count; - ops->get_ethtool_stats = dsa_master_get_ethtool_stats; - ops->get_strings = dsa_master_get_strings; - ops->get_ethtool_phy_stats = dsa_master_get_ethtool_phy_stats; + ops->get_regs_len = dsa_conduit_get_regs_len; + ops->get_regs = dsa_conduit_get_regs; + ops->get_sset_count = dsa_conduit_get_sset_count; + ops->get_ethtool_stats = dsa_conduit_get_ethtool_stats; + ops->get_strings = dsa_conduit_get_strings; + ops->get_ethtool_phy_stats = dsa_conduit_get_ethtool_phy_stats; dev->ethtool_ops = ops; return 0; } -static void dsa_master_ethtool_teardown(struct net_device *dev) +static void dsa_conduit_ethtool_teardown(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -260,16 +260,16 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) cpu_dp->orig_ethtool_ops = NULL; } -/* Keep the master always promiscuous if the tagging protocol requires that +/* Keep the conduit always promiscuous if the tagging protocol requires that * (garbles MAC DA) or if it doesn't support unicast filtering, case in which * it would revert to promiscuous mode as soon as we call dev_uc_add() on it * anyway. */ -static void dsa_master_set_promiscuity(struct net_device *dev, int inc) +static void dsa_conduit_set_promiscuity(struct net_device *dev, int inc) { const struct dsa_device_ops *ops = dev->dsa_ptr->tag_ops; - if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_master) + if ((dev->priv_flags & IFF_UNICAST_FLT) && !ops->promisc_on_conduit) return; ASSERT_RTNL(); @@ -336,17 +336,17 @@ out: } static DEVICE_ATTR_RW(tagging); -static struct attribute *dsa_slave_attrs[] = { +static struct attribute *dsa_user_attrs[] = { &dev_attr_tagging.attr, NULL }; static const struct attribute_group dsa_group = { .name = "dsa", - .attrs = dsa_slave_attrs, + .attrs = dsa_user_attrs, }; -static void dsa_master_reset_mtu(struct net_device *dev) +static void dsa_conduit_reset_mtu(struct net_device *dev) { int err; @@ -356,7 +356,7 @@ static void dsa_master_reset_mtu(struct net_device *dev) "Unable to reset MTU to exclude DSA overheads\n"); } -int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) +int dsa_conduit_setup(struct net_device *dev, struct dsa_port *cpu_dp) { const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; struct dsa_switch *ds = cpu_dp->ds; @@ -365,7 +365,7 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); - /* The DSA master must use SET_NETDEV_DEV for this to work. */ + /* The DSA conduit must use SET_NETDEV_DEV for this to work. */ if (!netif_is_lag_master(dev)) { consumer_link = device_link_add(ds->dev, dev->dev.parent, DL_FLAG_AUTOREMOVE_CONSUMER); @@ -376,7 +376,7 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) } /* The switch driver may not implement ->port_change_mtu(), case in - * which dsa_slave_change_mtu() will not update the master MTU either, + * which dsa_user_change_mtu() will not update the conduit MTU either, * so we need to do that here. */ ret = dev_set_mtu(dev, mtu); @@ -392,9 +392,9 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) dev->dsa_ptr = cpu_dp; - dsa_master_set_promiscuity(dev, 1); + dsa_conduit_set_promiscuity(dev, 1); - ret = dsa_master_ethtool_setup(dev); + ret = dsa_conduit_ethtool_setup(dev); if (ret) goto out_err_reset_promisc; @@ -405,18 +405,18 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) return ret; out_err_ethtool_teardown: - dsa_master_ethtool_teardown(dev); + dsa_conduit_ethtool_teardown(dev); out_err_reset_promisc: - dsa_master_set_promiscuity(dev, -1); + dsa_conduit_set_promiscuity(dev, -1); return ret; } -void dsa_master_teardown(struct net_device *dev) +void dsa_conduit_teardown(struct net_device *dev) { sysfs_remove_group(&dev->dev.kobj, &dsa_group); - dsa_master_ethtool_teardown(dev); - dsa_master_reset_mtu(dev); - dsa_master_set_promiscuity(dev, -1); + dsa_conduit_ethtool_teardown(dev); + dsa_conduit_reset_mtu(dev); + dsa_conduit_set_promiscuity(dev, -1); dev->dsa_ptr = NULL; @@ -427,40 +427,40 @@ void dsa_master_teardown(struct net_device *dev) wmb(); } -int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, - struct netdev_lag_upper_info *uinfo, - struct netlink_ext_ack *extack) +int dsa_conduit_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) { - bool master_setup = false; + bool conduit_setup = false; int err; if (!netdev_uses_dsa(lag_dev)) { - err = dsa_master_setup(lag_dev, cpu_dp); + err = dsa_conduit_setup(lag_dev, cpu_dp); if (err) return err; - master_setup = true; + conduit_setup = true; } err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack); if (err) { NL_SET_ERR_MSG_WEAK_MOD(extack, "CPU port failed to join LAG"); - goto out_master_teardown; + goto out_conduit_teardown; } return 0; -out_master_teardown: - if (master_setup) - dsa_master_teardown(lag_dev); +out_conduit_teardown: + if (conduit_setup) + dsa_conduit_teardown(lag_dev); return err; } -/* Tear down a master if there isn't any other user port on it, +/* Tear down a conduit if there isn't any other user port on it, * optionally also destroying LAG information. */ -void dsa_master_lag_teardown(struct net_device *lag_dev, - struct dsa_port *cpu_dp) +void dsa_conduit_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp) { struct net_device *upper; struct list_head *iter; @@ -468,8 +468,8 @@ void dsa_master_lag_teardown(struct net_device *lag_dev, dsa_port_lag_leave(cpu_dp, lag_dev); netdev_for_each_upper_dev_rcu(lag_dev, upper, iter) - if (dsa_slave_dev_check(upper)) + if (dsa_user_dev_check(upper)) return; - dsa_master_teardown(lag_dev); + dsa_conduit_teardown(lag_dev); } diff --git a/net/dsa/conduit.h b/net/dsa/conduit.h new file mode 100644 index 0000000000..31f8834f54 --- /dev/null +++ b/net/dsa/conduit.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_CONDUIT_H +#define __DSA_CONDUIT_H + +struct dsa_port; +struct net_device; +struct netdev_lag_upper_info; +struct netlink_ext_ack; + +int dsa_conduit_setup(struct net_device *dev, struct dsa_port *cpu_dp); +void dsa_conduit_teardown(struct net_device *dev); +int dsa_conduit_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack); +void dsa_conduit_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp); +int __dsa_conduit_hwtstamp_validate(struct net_device *dev, + const struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); + +#endif diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index ccbdb98109..ac7be864e8 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -20,14 +20,14 @@ #include <net/dsa_stubs.h> #include <net/sch_generic.h> +#include "conduit.h" #include "devlink.h" #include "dsa.h" -#include "master.h" #include "netlink.h" #include "port.h" -#include "slave.h" #include "switch.h" #include "tag.h" +#include "user.h" #define DSA_MAX_NUM_OFFLOADING_BRIDGES BITS_PER_LONG @@ -365,18 +365,18 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } -struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst) +struct net_device *dsa_tree_find_first_conduit(struct dsa_switch_tree *dst) { struct device_node *ethernet; - struct net_device *master; + struct net_device *conduit; struct dsa_port *cpu_dp; cpu_dp = dsa_tree_find_first_cpu(dst); ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0); - master = of_find_net_device_by_node(ethernet); + conduit = of_find_net_device_by_node(ethernet); of_node_put(ethernet); - return master; + return conduit; } /* Assign the default CPU port (the first one in the tree) to all ports of the @@ -517,7 +517,7 @@ static int dsa_port_setup(struct dsa_port *dp) break; case DSA_PORT_TYPE_USER: of_get_mac_address(dp->dn, dp->mac); - err = dsa_slave_create(dp); + err = dsa_user_create(dp); break; } @@ -554,9 +554,9 @@ static void dsa_port_teardown(struct dsa_port *dp) dsa_shared_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: - if (dp->slave) { - dsa_slave_destroy(dp->slave); - dp->slave = NULL; + if (dp->user) { + dsa_user_destroy(dp->user); + dp->user = NULL; } break; } @@ -632,9 +632,9 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (ds->setup) return 0; - /* Initialize ds->phys_mii_mask before registering the slave MDIO bus + /* Initialize ds->phys_mii_mask before registering the user MDIO bus * driver and before ops->setup() has run, since the switch drivers and - * the slave MDIO bus driver rely on these values for probing PHY + * the user MDIO bus driver rely on these values for probing PHY * devices or not */ ds->phys_mii_mask |= dsa_user_ports(ds); @@ -657,21 +657,21 @@ static int dsa_switch_setup(struct dsa_switch *ds) if (err) goto teardown; - if (!ds->slave_mii_bus && ds->ops->phy_read) { - ds->slave_mii_bus = mdiobus_alloc(); - if (!ds->slave_mii_bus) { + if (!ds->user_mii_bus && ds->ops->phy_read) { + ds->user_mii_bus = mdiobus_alloc(); + if (!ds->user_mii_bus) { err = -ENOMEM; goto teardown; } - dsa_slave_mii_bus_init(ds); + dsa_user_mii_bus_init(ds); dn = of_get_child_by_name(ds->dev->of_node, "mdio"); - err = of_mdiobus_register(ds->slave_mii_bus, dn); + err = of_mdiobus_register(ds->user_mii_bus, dn); of_node_put(dn); if (err < 0) - goto free_slave_mii_bus; + goto free_user_mii_bus; } dsa_switch_devlink_register(ds); @@ -679,9 +679,9 @@ static int dsa_switch_setup(struct dsa_switch *ds) ds->setup = true; return 0; -free_slave_mii_bus: - if (ds->slave_mii_bus && ds->ops->phy_read) - mdiobus_free(ds->slave_mii_bus); +free_user_mii_bus: + if (ds->user_mii_bus && ds->ops->phy_read) + mdiobus_free(ds->user_mii_bus); teardown: if (ds->ops->teardown) ds->ops->teardown(ds); @@ -699,10 +699,10 @@ static void dsa_switch_teardown(struct dsa_switch *ds) dsa_switch_devlink_unregister(ds); - if (ds->slave_mii_bus && ds->ops->phy_read) { - mdiobus_unregister(ds->slave_mii_bus); - mdiobus_free(ds->slave_mii_bus); - ds->slave_mii_bus = NULL; + if (ds->user_mii_bus && ds->ops->phy_read) { + mdiobus_unregister(ds->user_mii_bus); + mdiobus_free(ds->user_mii_bus); + ds->user_mii_bus = NULL; } dsa_switch_teardown_tag_protocol(ds); @@ -793,7 +793,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) return err; } -static int dsa_tree_setup_master(struct dsa_switch_tree *dst) +static int dsa_tree_setup_conduit(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp; int err = 0; @@ -801,18 +801,18 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) rtnl_lock(); dsa_tree_for_each_cpu_port(cpu_dp, dst) { - struct net_device *master = cpu_dp->master; - bool admin_up = (master->flags & IFF_UP) && - !qdisc_tx_is_noop(master); + struct net_device *conduit = cpu_dp->conduit; + bool admin_up = (conduit->flags & IFF_UP) && + !qdisc_tx_is_noop(conduit); - err = dsa_master_setup(master, cpu_dp); + err = dsa_conduit_setup(conduit, cpu_dp); if (err) break; - /* Replay master state event */ - dsa_tree_master_admin_state_change(dst, master, admin_up); - dsa_tree_master_oper_state_change(dst, master, - netif_oper_up(master)); + /* Replay conduit state event */ + dsa_tree_conduit_admin_state_change(dst, conduit, admin_up); + dsa_tree_conduit_oper_state_change(dst, conduit, + netif_oper_up(conduit)); } rtnl_unlock(); @@ -820,22 +820,22 @@ static int dsa_tree_setup_master(struct dsa_switch_tree *dst) return err; } -static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) +static void dsa_tree_teardown_conduit(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp; rtnl_lock(); dsa_tree_for_each_cpu_port(cpu_dp, dst) { - struct net_device *master = cpu_dp->master; + struct net_device *conduit = cpu_dp->conduit; /* Synthesizing an "admin down" state is sufficient for - * the switches to get a notification if the master is + * the switches to get a notification if the conduit is * currently up and running. */ - dsa_tree_master_admin_state_change(dst, master, false); + dsa_tree_conduit_admin_state_change(dst, conduit, false); - dsa_master_teardown(master); + dsa_conduit_teardown(conduit); } rtnl_unlock(); @@ -894,13 +894,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (err) goto teardown_switches; - err = dsa_tree_setup_master(dst); + err = dsa_tree_setup_conduit(dst); if (err) goto teardown_ports; err = dsa_tree_setup_lags(dst); if (err) - goto teardown_master; + goto teardown_conduit; dst->setup = true; @@ -908,8 +908,8 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) return 0; -teardown_master: - dsa_tree_teardown_master(dst); +teardown_conduit: + dsa_tree_teardown_conduit(dst); teardown_ports: dsa_tree_teardown_ports(dst); teardown_switches: @@ -929,7 +929,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_lags(dst); - dsa_tree_teardown_master(dst); + dsa_tree_teardown_conduit(dst); dsa_tree_teardown_ports(dst); @@ -978,7 +978,7 @@ out_disconnect: return err; } -/* Since the dsa/tagging sysfs device attribute is per master, the assumption +/* Since the dsa/tagging sysfs device attribute is per conduit, the assumption * is that all DSA switches within a tree share the same tagger, otherwise * they would have formed disjoint trees (different "dsa,member" values). */ @@ -999,10 +999,10 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, * restriction, there needs to be another mutex which serializes this. */ dsa_tree_for_each_user_port(dp, dst) { - if (dsa_port_to_master(dp)->flags & IFF_UP) + if (dsa_port_to_conduit(dp)->flags & IFF_UP) goto out_unlock; - if (dp->slave->flags & IFF_UP) + if (dp->user->flags & IFF_UP) goto out_unlock; } @@ -1028,62 +1028,62 @@ out_unlock: return err; } -static void dsa_tree_master_state_change(struct dsa_switch_tree *dst, - struct net_device *master) +static void dsa_tree_conduit_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit) { - struct dsa_notifier_master_state_info info; - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_notifier_conduit_state_info info; + struct dsa_port *cpu_dp = conduit->dsa_ptr; - info.master = master; - info.operational = dsa_port_master_is_operational(cpu_dp); + info.conduit = conduit; + info.operational = dsa_port_conduit_is_operational(cpu_dp); - dsa_tree_notify(dst, DSA_NOTIFIER_MASTER_STATE_CHANGE, &info); + dsa_tree_notify(dst, DSA_NOTIFIER_CONDUIT_STATE_CHANGE, &info); } -void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up) +void dsa_tree_conduit_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit, + bool up) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; bool notify = false; - /* Don't keep track of admin state on LAG DSA masters, - * but rather just of physical DSA masters + /* Don't keep track of admin state on LAG DSA conduits, + * but rather just of physical DSA conduits */ - if (netif_is_lag_master(master)) + if (netif_is_lag_master(conduit)) return; - if ((dsa_port_master_is_operational(cpu_dp)) != - (up && cpu_dp->master_oper_up)) + if ((dsa_port_conduit_is_operational(cpu_dp)) != + (up && cpu_dp->conduit_oper_up)) notify = true; - cpu_dp->master_admin_up = up; + cpu_dp->conduit_admin_up = up; if (notify) - dsa_tree_master_state_change(dst, master); + dsa_tree_conduit_state_change(dst, conduit); } -void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up) +void dsa_tree_conduit_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit, + bool up) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; bool notify = false; - /* Don't keep track of oper state on LAG DSA masters, - * but rather just of physical DSA masters + /* Don't keep track of oper state on LAG DSA conduits, + * but rather just of physical DSA conduits */ - if (netif_is_lag_master(master)) + if (netif_is_lag_master(conduit)) return; - if ((dsa_port_master_is_operational(cpu_dp)) != - (cpu_dp->master_admin_up && up)) + if ((dsa_port_conduit_is_operational(cpu_dp)) != + (cpu_dp->conduit_admin_up && up)) notify = true; - cpu_dp->master_oper_up = up; + cpu_dp->conduit_oper_up = up; if (notify) - dsa_tree_master_state_change(dst, master); + dsa_tree_conduit_state_change(dst, conduit); } static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index) @@ -1129,7 +1129,7 @@ static int dsa_port_parse_dsa(struct dsa_port *dp) } static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, - struct net_device *master) + struct net_device *conduit) { enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE; struct dsa_switch *mds, *ds = dp->ds; @@ -1140,21 +1140,21 @@ static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp, * happens the switch driver may want to know if its tagging protocol * is going to work in such a configuration. */ - if (dsa_slave_dev_check(master)) { - mdp = dsa_slave_to_port(master); + if (dsa_user_dev_check(conduit)) { + mdp = dsa_user_to_port(conduit); mds = mdp->ds; mdp_upstream = dsa_upstream_port(mds, mdp->index); tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream, DSA_TAG_PROTO_NONE); } - /* If the master device is not itself a DSA slave in a disjoint DSA + /* If the conduit device is not itself a DSA user in a disjoint DSA * tree, then return immediately. */ return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol); } -static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, +static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *conduit, const char *user_protocol) { const struct dsa_device_ops *tag_ops = NULL; @@ -1163,7 +1163,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, enum dsa_tag_protocol default_proto; /* Find out which protocol the switch would prefer. */ - default_proto = dsa_get_tag_protocol(dp, master); + default_proto = dsa_get_tag_protocol(dp, conduit); if (dst->default_proto) { if (dst->default_proto != default_proto) { dev_err(ds->dev, @@ -1218,7 +1218,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master, dst->tag_ops = tag_ops; } - dp->master = master; + dp->conduit = conduit; dp->type = DSA_PORT_TYPE_CPU; dsa_port_set_tag_protocol(dp, dst->tag_ops); dp->dst = dst; @@ -1248,16 +1248,16 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) dp->dn = dn; if (ethernet) { - struct net_device *master; + struct net_device *conduit; const char *user_protocol; - master = of_find_net_device_by_node(ethernet); + conduit = of_find_net_device_by_node(ethernet); of_node_put(ethernet); - if (!master) + if (!conduit) return -EPROBE_DEFER; user_protocol = of_get_property(dn, "dsa-tag-protocol", NULL); - return dsa_port_parse_cpu(dp, master, user_protocol); + return dsa_port_parse_cpu(dp, conduit, user_protocol); } if (link) @@ -1412,15 +1412,15 @@ static int dsa_port_parse(struct dsa_port *dp, const char *name, struct device *dev) { if (!strcmp(name, "cpu")) { - struct net_device *master; + struct net_device *conduit; - master = dsa_dev_to_net_device(dev); - if (!master) + conduit = dsa_dev_to_net_device(dev); + if (!conduit) return -EPROBE_DEFER; - dev_put(master); + dev_put(conduit); - return dsa_port_parse_cpu(dp, master, NULL); + return dsa_port_parse_cpu(dp, conduit, NULL); } if (!strcmp(name, "dsa")) @@ -1566,14 +1566,14 @@ void dsa_unregister_switch(struct dsa_switch *ds) } EXPORT_SYMBOL_GPL(dsa_unregister_switch); -/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is +/* If the DSA conduit chooses to unregister its net_device on .shutdown, DSA is * blocking that operation from completion, due to the dev_hold taken inside - * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of - * the DSA master, so that the system can reboot successfully. + * netdev_upper_dev_link. Unlink the DSA user interfaces from being uppers of + * the DSA conduit, so that the system can reboot successfully. */ void dsa_switch_shutdown(struct dsa_switch *ds) { - struct net_device *master, *slave_dev; + struct net_device *conduit, *user_dev; struct dsa_port *dp; mutex_lock(&dsa2_mutex); @@ -1584,17 +1584,17 @@ void dsa_switch_shutdown(struct dsa_switch *ds) rtnl_lock(); dsa_switch_for_each_user_port(dp, ds) { - master = dsa_port_to_master(dp); - slave_dev = dp->slave; + conduit = dsa_port_to_conduit(dp); + user_dev = dp->user; - netdev_upper_dev_unlink(master, slave_dev); + netdev_upper_dev_unlink(conduit, user_dev); } - /* Disconnect from further netdevice notifiers on the master, + /* Disconnect from further netdevice notifiers on the conduit, * since netdev_uses_dsa() will now return false. */ dsa_switch_for_each_cpu_port(dp, ds) - dp->master->dsa_ptr = NULL; + dp->conduit->dsa_ptr = NULL; rtnl_unlock(); out: @@ -1605,7 +1605,7 @@ EXPORT_SYMBOL_GPL(dsa_switch_shutdown); #ifdef CONFIG_PM_SLEEP static bool dsa_port_is_initialized(const struct dsa_port *dp) { - return dp->type == DSA_PORT_TYPE_USER && dp->slave; + return dp->type == DSA_PORT_TYPE_USER && dp->user; } int dsa_switch_suspend(struct dsa_switch *ds) @@ -1613,12 +1613,12 @@ int dsa_switch_suspend(struct dsa_switch *ds) struct dsa_port *dp; int ret = 0; - /* Suspend slave network devices */ + /* Suspend user network devices */ dsa_switch_for_each_port(dp, ds) { if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_suspend(dp->slave); + ret = dsa_user_suspend(dp->user); if (ret) return ret; } @@ -1641,12 +1641,12 @@ int dsa_switch_resume(struct dsa_switch *ds) if (ret) return ret; - /* Resume slave network devices */ + /* Resume user network devices */ dsa_switch_for_each_port(dp, ds) { if (!dsa_port_is_initialized(dp)) continue; - ret = dsa_slave_resume(dp->slave); + ret = dsa_user_resume(dp->user); if (ret) return ret; } @@ -1658,10 +1658,10 @@ EXPORT_SYMBOL_GPL(dsa_switch_resume); struct dsa_port *dsa_port_from_netdev(struct net_device *netdev) { - if (!netdev || !dsa_slave_dev_check(netdev)) + if (!netdev || !dsa_user_dev_check(netdev)) return ERR_PTR(-ENODEV); - return dsa_slave_to_port(netdev); + return dsa_user_to_port(netdev); } EXPORT_SYMBOL_GPL(dsa_port_from_netdev); @@ -1726,7 +1726,7 @@ bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, EXPORT_SYMBOL_GPL(dsa_mdb_present_in_other_db); static const struct dsa_stubs __dsa_stubs = { - .master_hwtstamp_validate = __dsa_master_hwtstamp_validate, + .conduit_hwtstamp_validate = __dsa_conduit_hwtstamp_validate, }; static void dsa_register_stubs(void) @@ -1748,7 +1748,7 @@ static int __init dsa_init_module(void) if (!dsa_owq) return -ENOMEM; - rc = dsa_slave_register_notifier(); + rc = dsa_user_register_notifier(); if (rc) goto register_notifier_fail; @@ -1763,7 +1763,7 @@ static int __init dsa_init_module(void) return 0; netlink_register_fail: - dsa_slave_unregister_notifier(); + dsa_user_unregister_notifier(); dev_remove_pack(&dsa_pack_type); register_notifier_fail: destroy_workqueue(dsa_owq); @@ -1778,7 +1778,7 @@ static void __exit dsa_cleanup_module(void) rtnl_link_unregister(&dsa_link_ops); - dsa_slave_unregister_notifier(); + dsa_user_unregister_notifier(); dev_remove_pack(&dsa_pack_type); destroy_workqueue(dsa_owq); } diff --git a/net/dsa/dsa.h b/net/dsa/dsa.h index b7e17ae109..3cc7823e9e 100644 --- a/net/dsa/dsa.h +++ b/net/dsa/dsa.h @@ -21,16 +21,16 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag); void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag); struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, const struct net_device *lag_dev); -struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst); +struct net_device *dsa_tree_find_first_conduit(struct dsa_switch_tree *dst); int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, const struct dsa_device_ops *tag_ops, const struct dsa_device_ops *old_tag_ops); -void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, - struct net_device *master, +void dsa_tree_conduit_admin_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit, + bool up); +void dsa_tree_conduit_oper_state_change(struct dsa_switch_tree *dst, + struct net_device *conduit, bool up); -void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, - struct net_device *master, - bool up); unsigned int dsa_bridge_num_get(const struct net_device *bridge_dev, int max); void dsa_bridge_num_put(const struct net_device *bridge_dev, unsigned int bridge_num); diff --git a/net/dsa/master.h b/net/dsa/master.h deleted file mode 100644 index 76e39d3ec9..0000000000 --- a/net/dsa/master.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -#ifndef __DSA_MASTER_H -#define __DSA_MASTER_H - -struct dsa_port; -struct net_device; -struct netdev_lag_upper_info; -struct netlink_ext_ack; - -int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); -void dsa_master_teardown(struct net_device *dev); -int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, - struct netdev_lag_upper_info *uinfo, - struct netlink_ext_ack *extack); -void dsa_master_lag_teardown(struct net_device *lag_dev, - struct dsa_port *cpu_dp); -int __dsa_master_hwtstamp_validate(struct net_device *dev, - const struct kernel_hwtstamp_config *config, - struct netlink_ext_ack *extack); - -#endif diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c index bd4bbaf851..1332e56349 100644 --- a/net/dsa/netlink.c +++ b/net/dsa/netlink.c @@ -5,10 +5,10 @@ #include <net/rtnetlink.h> #include "netlink.h" -#include "slave.h" +#include "user.h" static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = { - [IFLA_DSA_MASTER] = { .type = NLA_U32 }, + [IFLA_DSA_CONDUIT] = { .type = NLA_U32 }, }; static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], @@ -20,15 +20,15 @@ static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], if (!data) return 0; - if (data[IFLA_DSA_MASTER]) { - u32 ifindex = nla_get_u32(data[IFLA_DSA_MASTER]); - struct net_device *master; + if (data[IFLA_DSA_CONDUIT]) { + u32 ifindex = nla_get_u32(data[IFLA_DSA_CONDUIT]); + struct net_device *conduit; - master = __dev_get_by_index(dev_net(dev), ifindex); - if (!master) + conduit = __dev_get_by_index(dev_net(dev), ifindex); + if (!conduit) return -EINVAL; - err = dsa_slave_change_master(dev, master, extack); + err = dsa_user_change_conduit(dev, conduit, extack); if (err) return err; } @@ -38,15 +38,15 @@ static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], static size_t dsa_get_size(const struct net_device *dev) { - return nla_total_size(sizeof(u32)) + /* IFLA_DSA_MASTER */ + return nla_total_size(sizeof(u32)) + /* IFLA_DSA_CONDUIT */ 0; } static int dsa_fill_info(struct sk_buff *skb, const struct net_device *dev) { - struct net_device *master = dsa_slave_to_master(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); - if (nla_put_u32(skb, IFLA_DSA_MASTER, master->ifindex)) + if (nla_put_u32(skb, IFLA_DSA_CONDUIT, conduit->ifindex)) return -EMSGSIZE; return 0; diff --git a/net/dsa/port.c b/net/dsa/port.c index 37ab238e83..c42dac8767 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -14,9 +14,9 @@ #include "dsa.h" #include "port.h" -#include "slave.h" #include "switch.h" #include "tag_8021q.h" +#include "user.h" /** * dsa_port_notify - Notify the switching fabric of changes to a port @@ -289,7 +289,7 @@ static void dsa_port_reset_vlan_filtering(struct dsa_port *dp, } /* If the bridge was vlan_filtering, the bridge core doesn't trigger an - * event for changing vlan_filtering setting upon slave ports leaving + * event for changing vlan_filtering setting upon user ports leaving * it. That is a good thing, because that lets us handle it and also * handle the case where the switch's vlan_filtering setting is global * (not per port). When that happens, the correct moment to trigger the @@ -489,7 +489,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, .dp = dp, .extack = extack, }; - struct net_device *dev = dp->slave; + struct net_device *dev = dp->user; struct net_device *brport_dev; int err; @@ -514,8 +514,8 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, dp->bridge->tx_fwd_offload = info.tx_fwd_offload; err = switchdev_bridge_port_offload(brport_dev, dev, dp, - &dsa_slave_switchdev_notifier, - &dsa_slave_switchdev_blocking_notifier, + &dsa_user_switchdev_notifier, + &dsa_user_switchdev_blocking_notifier, dp->bridge->tx_fwd_offload, extack); if (err) goto out_rollback_unbridge; @@ -528,8 +528,8 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br, out_rollback_unoffload: switchdev_bridge_port_unoffload(brport_dev, dp, - &dsa_slave_switchdev_notifier, - &dsa_slave_switchdev_blocking_notifier); + &dsa_user_switchdev_notifier, + &dsa_user_switchdev_blocking_notifier); dsa_flush_workqueue(); out_rollback_unbridge: dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info); @@ -547,8 +547,8 @@ void dsa_port_pre_bridge_leave(struct dsa_port *dp, struct net_device *br) return; switchdev_bridge_port_unoffload(brport_dev, dp, - &dsa_slave_switchdev_notifier, - &dsa_slave_switchdev_blocking_notifier); + &dsa_user_switchdev_notifier, + &dsa_user_switchdev_blocking_notifier); dsa_flush_workqueue(); } @@ -741,10 +741,10 @@ static bool dsa_port_can_apply_vlan_filtering(struct dsa_port *dp, */ if (vlan_filtering && dsa_port_is_user(dp)) { struct net_device *br = dsa_port_bridge_dev_get(dp); - struct net_device *upper_dev, *slave = dp->slave; + struct net_device *upper_dev, *user = dp->user; struct list_head *iter; - netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) { + netdev_for_each_upper_dev_rcu(user, upper_dev, iter) { struct bridge_vlan_info br_info; u16 vid; @@ -803,9 +803,9 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, if (!ds->ops->port_vlan_filtering) return -EOPNOTSUPP; - /* We are called from dsa_slave_switchdev_blocking_event(), + /* We are called from dsa_user_switchdev_blocking_event(), * which is not under rcu_read_lock(), unlike - * dsa_slave_switchdev_event(). + * dsa_user_switchdev_event(). */ rcu_read_lock(); apply = dsa_port_can_apply_vlan_filtering(dp, vlan_filtering, extack); @@ -827,24 +827,24 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, ds->vlan_filtering = vlan_filtering; dsa_switch_for_each_user_port(other_dp, ds) { - struct net_device *slave = other_dp->slave; + struct net_device *user = other_dp->user; /* We might be called in the unbind path, so not - * all slave devices might still be registered. + * all user devices might still be registered. */ - if (!slave) + if (!user) continue; - err = dsa_slave_manage_vlan_filtering(slave, - vlan_filtering); + err = dsa_user_manage_vlan_filtering(user, + vlan_filtering); if (err) goto restore; } } else { dp->vlan_filtering = vlan_filtering; - err = dsa_slave_manage_vlan_filtering(dp->slave, - vlan_filtering); + err = dsa_user_manage_vlan_filtering(dp->user, + vlan_filtering); if (err) goto restore; } @@ -863,7 +863,7 @@ restore: } /* This enforces legacy behavior for switch drivers which assume they can't - * receive VLAN configuration when enslaved to a bridge with vlan_filtering=0 + * receive VLAN configuration when joining a bridge with vlan_filtering=0 */ bool dsa_port_skip_vlan_configuration(struct dsa_port *dp) { @@ -1047,7 +1047,7 @@ int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1057,12 +1057,12 @@ int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, if (!dp->ds->fdb_isolation) db.bridge.num = 0; - /* Avoid a call to __dev_set_promiscuity() on the master, which + /* Avoid a call to __dev_set_promiscuity() on the conduit, which * requires rtnl_lock(), since we can't guarantee that is held here, * and we can't take it either. */ - if (master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_add(master, addr); + if (conduit->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_add(conduit, addr); if (err) return err; } @@ -1098,7 +1098,7 @@ int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1108,8 +1108,8 @@ int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, if (!dp->ds->fdb_isolation) db.bridge.num = 0; - if (master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_del(master, addr); + if (conduit->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_del(conduit, addr); if (err) return err; } @@ -1229,7 +1229,7 @@ int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1239,7 +1239,7 @@ int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, if (!dp->ds->fdb_isolation) db.bridge.num = 0; - err = dev_mc_add(master, mdb->addr); + err = dev_mc_add(conduit, mdb->addr); if (err) return err; @@ -1273,7 +1273,7 @@ int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1283,7 +1283,7 @@ int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, if (!dp->ds->fdb_isolation) db.bridge.num = 0; - err = dev_mc_del(master, mdb->addr); + err = dev_mc_del(conduit, mdb->addr); if (err) return err; @@ -1318,7 +1318,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_notifier_vlan_info info = { .dp = dp, .vlan = vlan, @@ -1330,7 +1330,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - vlan_vid_add(master, htons(ETH_P_8021Q), vlan->vid); + vlan_vid_add(conduit, htons(ETH_P_8021Q), vlan->vid); return err; } @@ -1338,7 +1338,7 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, int dsa_port_host_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan) { - struct net_device *master = dsa_port_to_master(dp); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_notifier_vlan_info info = { .dp = dp, .vlan = vlan, @@ -1349,7 +1349,7 @@ int dsa_port_host_vlan_del(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid); + vlan_vid_del(conduit, htons(ETH_P_8021Q), vlan->vid); return err; } @@ -1398,24 +1398,24 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp); } -static int dsa_port_assign_master(struct dsa_port *dp, - struct net_device *master, - struct netlink_ext_ack *extack, - bool fail_on_err) +static int dsa_port_assign_conduit(struct dsa_port *dp, + struct net_device *conduit, + struct netlink_ext_ack *extack, + bool fail_on_err) { struct dsa_switch *ds = dp->ds; int port = dp->index, err; - err = ds->ops->port_change_master(ds, port, master, extack); + err = ds->ops->port_change_conduit(ds, port, conduit, extack); if (err && !fail_on_err) - dev_err(ds->dev, "port %d failed to assign master %s: %pe\n", - port, master->name, ERR_PTR(err)); + dev_err(ds->dev, "port %d failed to assign conduit %s: %pe\n", + port, conduit->name, ERR_PTR(err)); if (err && fail_on_err) return err; - dp->cpu_dp = master->dsa_ptr; - dp->cpu_port_in_lag = netif_is_lag_master(master); + dp->cpu_dp = conduit->dsa_ptr; + dp->cpu_port_in_lag = netif_is_lag_master(conduit); return 0; } @@ -1428,12 +1428,12 @@ static int dsa_port_assign_master(struct dsa_port *dp, * the old CPU port before changing it, and restore it on errors during the * bringup of the new one. */ -int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, - struct netlink_ext_ack *extack) +int dsa_port_change_conduit(struct dsa_port *dp, struct net_device *conduit, + struct netlink_ext_ack *extack) { struct net_device *bridge_dev = dsa_port_bridge_dev_get(dp); - struct net_device *old_master = dsa_port_to_master(dp); - struct net_device *dev = dp->slave; + struct net_device *old_conduit = dsa_port_to_conduit(dp); + struct net_device *dev = dp->user; struct dsa_switch *ds = dp->ds; bool vlan_filtering; int err, tmp; @@ -1454,7 +1454,7 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, */ vlan_filtering = dsa_port_is_vlan_filtering(dp); if (vlan_filtering) { - err = dsa_slave_manage_vlan_filtering(dev, false); + err = dsa_user_manage_vlan_filtering(dev, false); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to remove standalone VLANs"); @@ -1465,16 +1465,16 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, /* Standalone addresses, and addresses of upper interfaces like * VLAN, LAG, HSR need to be migrated. */ - dsa_slave_unsync_ha(dev); + dsa_user_unsync_ha(dev); - err = dsa_port_assign_master(dp, master, extack, true); + err = dsa_port_assign_conduit(dp, conduit, extack, true); if (err) goto rewind_old_addrs; - dsa_slave_sync_ha(dev); + dsa_user_sync_ha(dev); if (vlan_filtering) { - err = dsa_slave_manage_vlan_filtering(dev, true); + err = dsa_user_manage_vlan_filtering(dev, true); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to restore standalone VLANs"); @@ -1495,19 +1495,19 @@ int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, rewind_new_vlan: if (vlan_filtering) - dsa_slave_manage_vlan_filtering(dev, false); + dsa_user_manage_vlan_filtering(dev, false); rewind_new_addrs: - dsa_slave_unsync_ha(dev); + dsa_user_unsync_ha(dev); - dsa_port_assign_master(dp, old_master, NULL, false); + dsa_port_assign_conduit(dp, old_conduit, NULL, false); /* Restore the objects on the old CPU port */ rewind_old_addrs: - dsa_slave_sync_ha(dev); + dsa_user_sync_ha(dev); if (vlan_filtering) { - tmp = dsa_slave_manage_vlan_filtering(dev, true); + tmp = dsa_user_manage_vlan_filtering(dev, true); if (tmp) { dev_err(ds->dev, "port %d failed to restore standalone VLANs: %pe\n", @@ -1554,20 +1554,6 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp) return phydev; } -static void dsa_port_phylink_validate(struct phylink_config *config, - unsigned long *supported, - struct phylink_link_state *state) -{ - /* Skip call for drivers which don't yet set mac_capabilities, - * since validating in that case would mean their PHY will advertise - * nothing. In turn, skipping validation makes them advertise - * everything that the PHY supports, so those drivers should be - * converted ASAP. - */ - if (config->mac_capabilities) - phylink_generic_validate(config, supported, state); -} - static struct phylink_pcs * dsa_port_phylink_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) @@ -1634,7 +1620,7 @@ static void dsa_port_phylink_mac_link_down(struct phylink_config *config, struct dsa_switch *ds = dp->ds; if (dsa_port_is_user(dp)) - phydev = dp->slave->phydev; + phydev = dp->user->phydev; if (!ds->ops->phylink_mac_link_down) { if (ds->ops->adjust_link && phydev) @@ -1666,7 +1652,6 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, } static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { - .validate = dsa_port_phylink_validate, .mac_select_pcs = dsa_port_phylink_mac_select_pcs, .mac_prepare = dsa_port_phylink_mac_prepare, .mac_config = dsa_port_phylink_mac_config, @@ -1823,7 +1808,7 @@ err_phy_connect: * their type. * * User ports with no phy-handle or fixed-link are expected to connect to an - * internal PHY located on the ds->slave_mii_bus at an MDIO address equal to + * internal PHY located on the ds->user_mii_bus at an MDIO address equal to * the port number. This description is still actively supported. * * Shared (CPU and DSA) ports with no phy-handle or fixed-link are expected to @@ -1844,7 +1829,7 @@ err_phy_connect: * a fixed-link, a phy-handle, or a managed = "in-band-status" property. * It becomes the responsibility of the driver to ensure that these ports * operate at the maximum speed (whatever this means) and will interoperate - * with the DSA master or other cascade port, since phylink methods will not be + * with the DSA conduit or other cascade port, since phylink methods will not be * invoked for them. * * If you are considering expanding this table for newly introduced switches, @@ -2024,7 +2009,8 @@ void dsa_shared_port_link_unregister_of(struct dsa_port *dp) dsa_shared_port_setup_phy_of(dp, false); } -int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr) +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr, + struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; int err; @@ -2034,7 +2020,7 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr) dp->hsr_dev = hsr; - err = ds->ops->port_hsr_join(ds, dp->index, hsr); + err = ds->ops->port_hsr_join(ds, dp->index, hsr, extack); if (err) dp->hsr_dev = NULL; diff --git a/net/dsa/port.h b/net/dsa/port.h index dc812512fd..6bc3291573 100644 --- a/net/dsa/port.h +++ b/net/dsa/port.h @@ -103,12 +103,13 @@ int dsa_port_phylink_create(struct dsa_port *dp); void dsa_port_phylink_destroy(struct dsa_port *dp); int dsa_shared_port_link_register_of(struct dsa_port *dp); void dsa_shared_port_link_unregister_of(struct dsa_port *dp); -int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); +int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr, + struct netlink_ext_ack *extack); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc); -int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, - struct netlink_ext_ack *extack); +int dsa_port_change_conduit(struct dsa_port *dp, struct net_device *conduit, + struct netlink_ext_ack *extack); #endif diff --git a/net/dsa/slave.h b/net/dsa/slave.h deleted file mode 100644 index d0abe609e0..0000000000 --- a/net/dsa/slave.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -#ifndef __DSA_SLAVE_H -#define __DSA_SLAVE_H - -#include <linux/if_bridge.h> -#include <linux/if_vlan.h> -#include <linux/list.h> -#include <linux/netpoll.h> -#include <linux/types.h> -#include <net/dsa.h> -#include <net/gro_cells.h> - -struct net_device; -struct netlink_ext_ack; - -extern struct notifier_block dsa_slave_switchdev_notifier; -extern struct notifier_block dsa_slave_switchdev_blocking_notifier; - -struct dsa_slave_priv { - /* Copy of CPU port xmit for faster access in slave transmit hot path */ - struct sk_buff * (*xmit)(struct sk_buff *skb, - struct net_device *dev); - - struct gro_cells gcells; - - /* DSA port data, such as switch, port index, etc. */ - struct dsa_port *dp; - -#ifdef CONFIG_NET_POLL_CONTROLLER - struct netpoll *netpoll; -#endif - - /* TC context */ - struct list_head mall_tc_list; -}; - -void dsa_slave_mii_bus_init(struct dsa_switch *ds); -int dsa_slave_create(struct dsa_port *dp); -void dsa_slave_destroy(struct net_device *slave_dev); -int dsa_slave_suspend(struct net_device *slave_dev); -int dsa_slave_resume(struct net_device *slave_dev); -int dsa_slave_register_notifier(void); -void dsa_slave_unregister_notifier(void); -void dsa_slave_sync_ha(struct net_device *dev); -void dsa_slave_unsync_ha(struct net_device *dev); -void dsa_slave_setup_tagger(struct net_device *slave); -int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); -int dsa_slave_change_master(struct net_device *dev, struct net_device *master, - struct netlink_ext_ack *extack); -int dsa_slave_manage_vlan_filtering(struct net_device *dev, - bool vlan_filtering); - -static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - - return p->dp; -} - -static inline struct net_device * -dsa_slave_to_master(const struct net_device *dev) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return dsa_port_to_master(dp); -} - -#endif diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 1a42f93173..3d2feeea89 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -15,10 +15,10 @@ #include "dsa.h" #include "netlink.h" #include "port.h" -#include "slave.h" #include "switch.h" #include "tag_8021q.h" #include "trace.h" +#include "user.h" static unsigned int dsa_switch_fastest_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) @@ -894,12 +894,12 @@ static int dsa_switch_change_tag_proto(struct dsa_switch *ds, * bits that depend on the tagger, such as the MTU. */ dsa_switch_for_each_user_port(dp, ds) { - struct net_device *slave = dp->slave; + struct net_device *user = dp->user; - dsa_slave_setup_tagger(slave); + dsa_user_setup_tagger(user); /* rtnl_mutex is held in dsa_tree_change_tag_proto */ - dsa_slave_change_mtu(slave, slave->mtu); + dsa_user_change_mtu(user, user->mtu); } return 0; @@ -960,13 +960,13 @@ dsa_switch_disconnect_tag_proto(struct dsa_switch *ds, } static int -dsa_switch_master_state_change(struct dsa_switch *ds, - struct dsa_notifier_master_state_info *info) +dsa_switch_conduit_state_change(struct dsa_switch *ds, + struct dsa_notifier_conduit_state_info *info) { - if (!ds->ops->master_state_change) + if (!ds->ops->conduit_state_change) return 0; - ds->ops->master_state_change(ds, info->master, info->operational); + ds->ops->conduit_state_change(ds, info->conduit, info->operational); return 0; } @@ -1056,8 +1056,8 @@ static int dsa_switch_event(struct notifier_block *nb, case DSA_NOTIFIER_TAG_8021Q_VLAN_DEL: err = dsa_switch_tag_8021q_vlan_del(ds, info); break; - case DSA_NOTIFIER_MASTER_STATE_CHANGE: - err = dsa_switch_master_state_change(ds, info); + case DSA_NOTIFIER_CONDUIT_STATE_CHANGE: + err = dsa_switch_conduit_state_change(ds, info); break; default: err = -EOPNOTSUPP; diff --git a/net/dsa/switch.h b/net/dsa/switch.h index ea034677da..be0a2749cd 100644 --- a/net/dsa/switch.h +++ b/net/dsa/switch.h @@ -34,7 +34,7 @@ enum { DSA_NOTIFIER_TAG_PROTO_DISCONNECT, DSA_NOTIFIER_TAG_8021Q_VLAN_ADD, DSA_NOTIFIER_TAG_8021Q_VLAN_DEL, - DSA_NOTIFIER_MASTER_STATE_CHANGE, + DSA_NOTIFIER_CONDUIT_STATE_CHANGE, }; /* DSA_NOTIFIER_AGEING_TIME */ @@ -105,9 +105,9 @@ struct dsa_notifier_tag_8021q_vlan_info { u16 vid; }; -/* DSA_NOTIFIER_MASTER_STATE_CHANGE */ -struct dsa_notifier_master_state_info { - const struct net_device *master; +/* DSA_NOTIFIER_CONDUIT_STATE_CHANGE */ +struct dsa_notifier_conduit_state_info { + const struct net_device *conduit; bool operational; }; diff --git a/net/dsa/tag.c b/net/dsa/tag.c index 5105a5ff58..6e402d49af 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -13,8 +13,8 @@ #include <net/dsa.h> #include <net/dst_metadata.h> -#include "slave.h" #include "tag.h" +#include "user.h" static LIST_HEAD(dsa_tag_drivers_list); static DEFINE_MUTEX(dsa_tag_drivers_lock); @@ -27,7 +27,7 @@ static DEFINE_MUTEX(dsa_tag_drivers_lock); * switch, the DSA driver owning the interface to which the packet is * delivered is never notified unless we do so here. */ -static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, +static bool dsa_skb_defer_rx_timestamp(struct dsa_user_priv *p, struct sk_buff *skb) { struct dsa_switch *ds = p->dp->ds; @@ -57,7 +57,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; - struct dsa_slave_priv *p; + struct dsa_user_priv *p; if (unlikely(!cpu_dp)) { kfree_skb(skb); @@ -75,7 +75,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb_has_extensions(skb)) skb->slow_gro = 0; - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (likely(skb->dev)) { dsa_default_offload_fwd_mark(skb); nskb = skb; @@ -94,7 +94,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); - if (unlikely(!dsa_slave_dev_check(skb->dev))) { + if (unlikely(!dsa_user_dev_check(skb->dev))) { /* Packet is to be injected directly on an upper * device, e.g. a team/bond, so skip all DSA-port * specific actions. diff --git a/net/dsa/tag.h b/net/dsa/tag.h index 32d12f4a9d..f6b9c73718 100644 --- a/net/dsa/tag.h +++ b/net/dsa/tag.h @@ -9,7 +9,7 @@ #include <net/dsa.h> #include "port.h" -#include "slave.h" +#include "user.h" struct dsa_tag_driver { const struct dsa_device_ops *ops; @@ -29,7 +29,7 @@ static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) return ops->needed_headroom + ops->needed_tailroom; } -static inline struct net_device *dsa_master_find_slave(struct net_device *dev, +static inline struct net_device *dsa_conduit_find_user(struct net_device *dev, int device, int port) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@ -39,7 +39,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, list_for_each_entry(dp, &dst->ports, list) if (dp->ds->index == device && dp->index == port && dp->type == DSA_PORT_TYPE_USER) - return dp->slave; + return dp->user; return NULL; } @@ -49,7 +49,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, */ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) { - struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct dsa_port *dp = dsa_user_to_port(skb->dev); struct net_device *br = dsa_port_bridge_dev_get(dp); struct net_device *dev = skb->dev; struct net_device *upper_dev; @@ -107,12 +107,12 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) * to support termination through the bridge. */ static inline struct net_device * -dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) +dsa_find_designated_bridge_port_by_vid(struct net_device *conduit, u16 vid) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct bridge_vlan_info vinfo; - struct net_device *slave; + struct net_device *user; struct dsa_port *dp; int err; @@ -134,13 +134,13 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) if (dp->cpu_dp != cpu_dp) continue; - slave = dp->slave; + user = dp->user; - err = br_vlan_get_info_rcu(slave, vid, &vinfo); + err = br_vlan_get_info_rcu(user, vid, &vinfo); if (err) continue; - return slave; + return user; } return NULL; @@ -155,7 +155,7 @@ dsa_find_designated_bridge_port_by_vid(struct net_device *master, u16 vid) */ static inline void dsa_default_offload_fwd_mark(struct sk_buff *skb) { - struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct dsa_port *dp = dsa_user_to_port(skb->dev); skb->offload_fwd_mark = !!(dp->bridge); } @@ -215,9 +215,9 @@ static inline void dsa_alloc_etype_header(struct sk_buff *skb, int len) memmove(skb->data, skb->data + len, 2 * ETH_ALEN); } -/* On RX, eth_type_trans() on the DSA master pulls ETH_HLEN bytes starting from +/* On RX, eth_type_trans() on the DSA conduit pulls ETH_HLEN bytes starting from * skb_mac_header(skb), which leaves skb->data pointing at the first byte after - * what the DSA master perceives as the EtherType (the beginning of the L3 + * what the DSA conduit perceives as the EtherType (the beginning of the L3 * protocol). Since DSA EtherType header taggers treat the EtherType as part of * the DSA tag itself, and the EtherType is 2 bytes in length, the DSA header * is located 2 bytes behind skb->data. Note that EtherType in this context diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index cbdfc392f7..71b26ae6db 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -73,7 +73,7 @@ struct dsa_tag_8021q_vlan { struct dsa_8021q_context { struct dsa_switch *ds; struct list_head vlans; - /* EtherType of RX VID, used for filtering on master interface */ + /* EtherType of RX VID, used for filtering on conduit interface */ __be16 proto; }; @@ -338,7 +338,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); u16 vid = dsa_tag_8021q_standalone_vid(dp); - struct net_device *master; + struct net_device *conduit; int err; /* The CPU port is implicitly configured by @@ -347,7 +347,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return 0; - master = dsa_port_to_master(dp); + conduit = dsa_port_to_conduit(dp); err = dsa_port_tag_8021q_vlan_add(dp, vid, false); if (err) { @@ -357,8 +357,8 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) return err; } - /* Add the VLAN to the master's RX filter. */ - vlan_vid_add(master, ctx->proto, vid); + /* Add the VLAN to the conduit's RX filter. */ + vlan_vid_add(conduit, ctx->proto, vid); return err; } @@ -368,7 +368,7 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) struct dsa_8021q_context *ctx = ds->tag_8021q_ctx; struct dsa_port *dp = dsa_to_port(ds, port); u16 vid = dsa_tag_8021q_standalone_vid(dp); - struct net_device *master; + struct net_device *conduit; /* The CPU port is implicitly configured by * configuring the front-panel ports @@ -376,11 +376,11 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return; - master = dsa_port_to_master(dp); + conduit = dsa_port_to_conduit(dp); dsa_port_tag_8021q_vlan_del(dp, vid, false); - vlan_vid_del(master, ctx->proto, vid); + vlan_vid_del(conduit, ctx->proto, vid); } static int dsa_tag_8021q_setup(struct dsa_switch *ds) @@ -468,10 +468,10 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct dsa_port *dp; @@ -490,7 +490,7 @@ struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, continue; if (dsa_port_bridge_num_get(dp) == vbid) - return dp->slave; + return dp->user; } return NULL; diff --git a/net/dsa/tag_8021q.h b/net/dsa/tag_8021q.h index b75cbaa028..41f7167ac5 100644 --- a/net/dsa/tag_8021q.h +++ b/net/dsa/tag_8021q.h @@ -16,7 +16,7 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, void dsa_8021q_rcv(struct sk_buff *skb, int *source_port, int *switch_id, int *vbid); -struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *master, +struct net_device *dsa_tag_8021q_find_port_by_vbid(struct net_device *conduit, int vbid); int dsa_switch_tag_8021q_vlan_add(struct dsa_switch *ds, diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 7f3b7d730b..92ce67b93a 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -29,7 +29,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __le16 *phdr; u16 hdr; @@ -74,7 +74,7 @@ static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb, /* Get source port information */ port = FIELD_GET(AR9331_HDR_PORT_NUM_MASK, hdr); - skb->dev = dsa_master_find_slave(ndev, 0, port); + skb->dev = dsa_conduit_find_user(ndev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index cacdafb412..83d283a5d2 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -85,7 +85,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, struct net_device *dev, unsigned int offset) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u16 queue = skb_get_queue_mapping(skb); u8 *brcm_tag; @@ -96,7 +96,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. * - * Let dsa_slave_xmit() free the SKB + * Let dsa_user_xmit() free the SKB */ if (__skb_put_padto(skb, ETH_ZLEN + BRCM_TAG_LEN, false)) return NULL; @@ -119,7 +119,7 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK; - /* Now tell the master network device about the desired output queue + /* Now tell the conduit network device about the desired output queue * as well */ skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue)); @@ -164,7 +164,7 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Locate which port this is coming from */ source_port = brcm_tag[3] & BRCM_EG_PID_MASK; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) return NULL; @@ -216,7 +216,7 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_BRCM, BRCM_NAME); static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 *brcm_tag; /* The Ethernet switch we are interfaced with needs packets to be at @@ -226,7 +226,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, * (including FCS and tag) because the length verification is done after * the Broadcom tag is stripped off the ingress packet. * - * Let dsa_slave_xmit() free the SKB + * Let dsa_user_xmit() free the SKB */ if (__skb_put_padto(skb, ETH_ZLEN + BRCM_LEG_TAG_LEN, false)) return NULL; @@ -264,7 +264,7 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 1fd7fa26db..8ed52dd663 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -129,7 +129,7 @@ enum dsa_code { static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev, u8 extra) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct net_device *br_dev; u8 tag_dev, tag_port; enum dsa_cmd cmd; @@ -267,14 +267,14 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev, lag = dsa_lag_by_id(cpu_dp->dst, source_port + 1); skb->dev = lag ? lag->dev : NULL; } else { - skb->dev = dsa_master_find_slave(dev, source_device, + skb->dev = dsa_conduit_find_user(dev, source_device, source_port); } if (!skb->dev) return NULL; - /* When using LAG offload, skb->dev is not a DSA slave interface, + /* When using LAG offload, skb->dev is not a DSA user interface, * so we cannot call dsa_default_offload_fwd_mark and we need to * special-case it. */ diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index e279cd9057..3539141b53 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -61,7 +61,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 *gswip_tag; skb_push(skb, GSWIP_TX_HEADER_LEN); @@ -89,7 +89,7 @@ static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb, /* Get source port information */ port = (gswip_tag[7] & GSWIP_RX_SPPID_MASK) >> GSWIP_RX_SPPID_SHIFT; - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index 03a1fb9c87..6e233cd0aa 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -20,7 +20,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 *tag; /* Calculate checksums (if required) before adding the trailer tag to @@ -45,7 +45,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN; unsigned int port = tag[0] & 0x03; - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) { netdev_warn_once(dev, "Failed to get source port: %d\n", port); return NULL; diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index ea100bd259..9be341fa88 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -87,7 +87,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, struct net_device *dev, unsigned int port, unsigned int len) { - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; @@ -119,7 +119,7 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct ethhdr *hdr; u8 *tag; @@ -256,7 +256,7 @@ static struct sk_buff *ksz_defer_xmit(struct dsa_port *dp, struct sk_buff *skb) return NULL; kthread_init_work(&xmit_work->work, xmit_work_fn); - /* Increase refcount so the kfree_skb in dsa_slave_xmit + /* Increase refcount so the kfree_skb in dsa_user_xmit * won't really free the packet. */ xmit_work->dp = dp; @@ -272,7 +272,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, { u16 queue_mapping = skb_get_queue_mapping(skb); u8 prio = netdev_txq_to_tc(dev, queue_mapping); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct ethhdr *hdr; __be16 *tag; u16 val; @@ -293,6 +293,14 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, if (is_link_local_ether_addr(hdr->h_dest)) val |= KSZ9477_TAIL_TAG_OVERRIDE; + if (dev->features & NETIF_F_HW_HSR_DUP) { + struct net_device *hsr_dev = dp->hsr_dev; + struct dsa_port *other_dp; + + dsa_hsr_foreach_port(other_dp, dp->ds, hsr_dev) + val |= BIT(other_dp->index); + } + *tag = cpu_to_be16(val); return ksz_defer_xmit(dp, skb); @@ -336,7 +344,7 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, { u16 queue_mapping = skb_get_queue_mapping(skb); u8 prio = netdev_txq_to_tc(dev, queue_mapping); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct ethhdr *hdr; u8 *tag; @@ -402,7 +410,7 @@ static struct sk_buff *lan937x_xmit(struct sk_buff *skb, { u16 queue_mapping = skb_get_queue_mapping(skb); u8 prio = netdev_txq_to_tc(dev, queue_mapping); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); const struct ethhdr *hdr = eth_hdr(skb); __be16 *tag; u16 val; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index c25f553670..1ed8ee2485 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -56,7 +56,7 @@ static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr) static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __be16 *lan9303_tag; u16 tag; @@ -99,7 +99,7 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev) source_port = lan9303_tag1 & 0x3; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); return NULL; diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 40af804527..2483785f6a 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -23,7 +23,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 xmit_tpid; u8 *mtk_tag; @@ -85,7 +85,7 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev) /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_none.c b/net/dsa/tag_none.c index d2fd179c42..9a473624db 100644 --- a/net/dsa/tag_none.c +++ b/net/dsa/tag_none.c @@ -12,8 +12,8 @@ #define NONE_NAME "none" -static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, - struct net_device *dev) +static struct sk_buff *dsa_user_notag_xmit(struct sk_buff *skb, + struct net_device *dev) { /* Just return the original SKB */ return skb; @@ -22,7 +22,7 @@ static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb, static const struct dsa_device_ops none_ops = { .name = NONE_NAME, .proto = DSA_TAG_PROTO_NONE, - .xmit = dsa_slave_notag_xmit, + .xmit = dsa_user_notag_xmit, }; module_dsa_tag_driver(none_ops); diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 20bf7074d5..ef2f8fffb2 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -45,7 +45,7 @@ static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, __be32 ifh_prefix, void **ifh) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); struct dsa_switch *ds = dp->ds; u64 vlan_tci, tag_type; void *injection; @@ -79,7 +79,7 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); void *injection; ocelot_xmit_common(skb, netdev, cpu_to_be32(0x8880000a), &injection); @@ -91,7 +91,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, static struct sk_buff *seville_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); void *injection; ocelot_xmit_common(skb, netdev, cpu_to_be32(0x88800005), &injection); @@ -111,12 +111,12 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, u16 vlan_tpid; u64 rew_val; - /* Revert skb->data by the amount consumed by the DSA master, + /* Revert skb->data by the amount consumed by the DSA conduit, * so it points to the beginning of the frame. */ skb_push(skb, ETH_HLEN); /* We don't care about the short prefix, it is just for easy entrance - * into the DSA master's RX filter. Discard it now by moving it into + * into the DSA conduit's RX filter. Discard it now by moving it into * the headroom. */ skb_pull(skb, OCELOT_SHORT_PREFIX_LEN); @@ -141,12 +141,12 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, ocelot_xfh_get_vlan_tci(extraction, &vlan_tci); ocelot_xfh_get_rew_val(extraction, &rew_val); - skb->dev = dsa_master_find_slave(netdev, 0, src_port); + skb->dev = dsa_conduit_find_user(netdev, 0, src_port); if (!skb->dev) /* The switch will reflect back some frames sent through - * sockets opened on the bare DSA master. These will come back + * sockets opened on the bare DSA conduit. These will come back * with src_port equal to the index of the CPU port, for which - * there is no slave registered. So don't print any error + * there is no user registered. So don't print any error * message here (ignore and drop those frames). */ return NULL; @@ -170,7 +170,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, * equal to the pvid of the ingress port and should not be used for * processing. */ - dp = dsa_slave_to_port(skb->dev); + dp = dsa_user_to_port(skb->dev); vlan_tpid = tag_type ? ETH_P_8021AD : ETH_P_8021Q; if (dsa_port_is_vlan_filtering(dp) && @@ -192,7 +192,7 @@ static const struct dsa_device_ops ocelot_netdev_ops = { .xmit = ocelot_xmit, .rcv = ocelot_rcv, .needed_headroom = OCELOT_TOTAL_TAG_LEN, - .promisc_on_master = true, + .promisc_on_conduit = true, }; DSA_TAG_DRIVER(ocelot_netdev_ops); @@ -204,7 +204,7 @@ static const struct dsa_device_ops seville_netdev_ops = { .xmit = seville_xmit, .rcv = ocelot_rcv, .needed_headroom = OCELOT_TOTAL_TAG_LEN, - .promisc_on_master = true, + .promisc_on_conduit = true, }; DSA_TAG_DRIVER(seville_netdev_ops); diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 1f0b8c20eb..2100393208 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -37,8 +37,8 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, return NULL; /* PTP over IP packets need UDP checksumming. We may have inherited - * NETIF_F_HW_CSUM from the DSA master, but these packets are not sent - * through the DSA master, so calculate the checksum here. + * NETIF_F_HW_CSUM from the DSA conduit, but these packets are not sent + * through the DSA conduit, so calculate the checksum here. */ if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) return NULL; @@ -49,7 +49,7 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, /* Calls felix_port_deferred_xmit in felix.c */ kthread_init_work(&xmit_work->work, xmit_work_fn); - /* Increase refcount so the kfree_skb in dsa_slave_xmit + /* Increase refcount so the kfree_skb in dsa_user_xmit * won't really free the packet. */ xmit_work->dp = dp; @@ -63,7 +63,7 @@ static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp, static struct sk_buff *ocelot_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); @@ -83,7 +83,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb, dsa_8021q_rcv(skb, &src_port, &switch_id, NULL); - skb->dev = dsa_master_find_slave(netdev, switch_id, src_port); + skb->dev = dsa_conduit_find_user(netdev, switch_id, src_port); if (!skb->dev) return NULL; @@ -130,7 +130,7 @@ static const struct dsa_device_ops ocelot_8021q_netdev_ops = { .connect = ocelot_connect, .disconnect = ocelot_disconnect, .needed_headroom = VLAN_HLEN, - .promisc_on_master = true, + .promisc_on_conduit = true, }; MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index e5ff7c34e5..6514aa7993 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -14,7 +14,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __be16 *phdr; u16 hdr; @@ -78,7 +78,7 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) /* Get source port information */ port = FIELD_GET(QCA_HDR_RECV_SOURCE_PORT, hdr); - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; @@ -116,7 +116,7 @@ static const struct dsa_device_ops qca_netdev_ops = { .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, .needed_headroom = QCA_HDR_LEN, - .promisc_on_master = true, + .promisc_on_conduit = true, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index c327314b95..4da5bad1a7 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -36,7 +36,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __be16 *p; u8 *tag; u16 out; @@ -97,9 +97,9 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, } port = protport & 0xff; - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) { - netdev_dbg(dev, "could not find slave for port %d\n", port); + netdev_dbg(dev, "could not find user for port %d\n", port); return NULL; } diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c index 4f67834fd1..07e857deba 100644 --- a/net/dsa/tag_rtl8_4.c +++ b/net/dsa/tag_rtl8_4.c @@ -103,7 +103,7 @@ static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev, void *tag) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); __be16 tag16[RTL8_4_TAG_LEN / 2]; /* Set Realtek EtherType */ @@ -180,10 +180,10 @@ static int rtl8_4_read_tag(struct sk_buff *skb, struct net_device *dev, /* Parse TX (switch->CPU) */ port = FIELD_GET(RTL8_4_TX, ntohs(tag16[3])); - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) { dev_warn_ratelimited(&dev->dev, - "could not find slave for port %d\n", + "could not find user for port %d\n", port); return -ENOENT; } diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c index 437a6820ac..2ce866b456 100644 --- a/net/dsa/tag_rzn1_a5psw.c +++ b/net/dsa/tag_rzn1_a5psw.c @@ -39,7 +39,7 @@ struct a5psw_tag { static struct sk_buff *a5psw_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct a5psw_tag *ptag; u32 data2_val; @@ -90,7 +90,7 @@ static struct sk_buff *a5psw_tag_rcv(struct sk_buff *skb, port = FIELD_GET(A5PSW_CTRL_DATA_PORT, ntohs(tag->ctrl_data)); - skb->dev = dsa_master_find_slave(dev, 0, port); + skb->dev = dsa_conduit_find_user(dev, 0, port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index ade3eeb2f3..1fffe8c2b5 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -157,7 +157,7 @@ static struct sk_buff *sja1105_defer_xmit(struct dsa_port *dp, return NULL; kthread_init_work(&xmit_work->work, xmit_work_fn); - /* Increase refcount so the kfree_skb in dsa_slave_xmit + /* Increase refcount so the kfree_skb in dsa_user_xmit * won't really free the packet. */ xmit_work->dp = dp; @@ -210,7 +210,7 @@ static u16 sja1105_xmit_tpid(struct dsa_port *dp) static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); unsigned int bridge_num = dsa_port_bridge_num_get(dp); struct net_device *br = dsa_port_bridge_dev_get(dp); u16 tx_vid; @@ -235,7 +235,7 @@ static struct sk_buff *sja1105_imprecise_xmit(struct sk_buff *skb, /* Transform untagged control packets into pvid-tagged control packets so that * all packets sent by this tagger are VLAN-tagged and we can configure the - * switch to drop untagged packets coming from the DSA master. + * switch to drop untagged packets coming from the DSA conduit. */ static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp, struct sk_buff *skb, u8 pcp) @@ -266,7 +266,7 @@ static struct sk_buff *sja1105_pvid_tag_control_pkt(struct dsa_port *dp, static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct net_device *netdev) { - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); @@ -294,7 +294,7 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb, struct net_device *netdev) { struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone; - struct dsa_port *dp = dsa_slave_to_port(netdev); + struct dsa_port *dp = dsa_user_to_port(netdev); u16 queue_mapping = skb_get_queue_mapping(skb); u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); u16 tx_vid = dsa_tag_8021q_standalone_vid(dp); @@ -383,7 +383,7 @@ static struct sk_buff * Buffer it until we get its meta frame. */ if (is_link_local) { - struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct dsa_port *dp = dsa_user_to_port(skb->dev); struct sja1105_tagger_private *priv; struct dsa_switch *ds = dp->ds; @@ -396,7 +396,7 @@ static struct sk_buff if (priv->stampable_skb) { dev_err_ratelimited(ds->dev, "Expected meta frame, is %12llx " - "in the DSA master multicast filter?\n", + "in the DSA conduit multicast filter?\n", SJA1105_META_DMAC); kfree_skb(priv->stampable_skb); } @@ -417,7 +417,7 @@ static struct sk_buff * frame, which serves no further purpose). */ } else if (is_meta) { - struct dsa_port *dp = dsa_slave_to_port(skb->dev); + struct dsa_port *dp = dsa_user_to_port(skb->dev); struct sja1105_tagger_private *priv; struct dsa_switch *ds = dp->ds; struct sk_buff *stampable_skb; @@ -550,7 +550,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, } if (source_port != -1 && switch_id != -1) - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); else if (vbid >= 1) skb->dev = dsa_tag_8021q_find_port_by_vbid(netdev, vbid); else @@ -573,16 +573,16 @@ static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header) int switch_id = SJA1110_RX_HEADER_SWITCH_ID(rx_header); int n_ts = SJA1110_RX_HEADER_N_TS(rx_header); struct sja1105_tagger_data *tagger_data; - struct net_device *master = skb->dev; + struct net_device *conduit = skb->dev; struct dsa_port *cpu_dp; struct dsa_switch *ds; int i; - cpu_dp = master->dsa_ptr; + cpu_dp = conduit->dsa_ptr; ds = dsa_switch_find(cpu_dp->dst->index, switch_id); if (!ds) { net_err_ratelimited("%s: cannot find switch id %d\n", - master->name, switch_id); + conduit->name, switch_id); return NULL; } @@ -649,7 +649,7 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, /* skb->len counts from skb->data, while start_of_padding * counts from the destination MAC address. Right now skb->data - * is still as set by the DSA master, so to trim away the + * is still as set by the DSA conduit, so to trim away the * padding and trailer we need to account for the fact that * skb->data points to skb_mac_header(skb) + ETH_HLEN. */ @@ -698,7 +698,7 @@ static struct sk_buff *sja1110_rcv(struct sk_buff *skb, else if (source_port == -1 || switch_id == -1) skb->dev = dsa_find_designated_bridge_port_by_vid(netdev, vid); else - skb->dev = dsa_master_find_slave(netdev, switch_id, source_port); + skb->dev = dsa_conduit_find_user(netdev, switch_id, source_port); if (!skb->dev) { netdev_warn(netdev, "Couldn't decode source port\n"); return NULL; @@ -778,7 +778,7 @@ static const struct dsa_device_ops sja1105_netdev_ops = { .disconnect = sja1105_disconnect, .needed_headroom = VLAN_HLEN, .flow_dissect = sja1105_flow_dissect, - .promisc_on_master = true, + .promisc_on_conduit = true, }; DSA_TAG_DRIVER(sja1105_netdev_ops); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 7361b91063..1ebb25a8b1 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -14,7 +14,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); u8 *trailer; trailer = skb_put(skb, 4); @@ -41,7 +41,7 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev) source_port = trailer[1] & 7; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c index af19969f9b..c9c163598e 100644 --- a/net/dsa/tag_xrs700x.c +++ b/net/dsa/tag_xrs700x.c @@ -13,7 +13,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_port *partner, *dp = dsa_slave_to_port(dev); + struct dsa_port *partner, *dp = dsa_user_to_port(dev); u8 *trailer; trailer = skb_put(skb, 1); @@ -39,7 +39,7 @@ static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev) if (source_port < 0) return NULL; - skb->dev = dsa_master_find_slave(dev, 0, source_port); + skb->dev = dsa_conduit_find_user(dev, 0, source_port); if (!skb->dev) return NULL; diff --git a/net/dsa/slave.c b/net/dsa/user.c index 48db91b333..a82c7f5a1a 100644 --- a/net/dsa/slave.c +++ b/net/dsa/user.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * net/dsa/slave.c - Slave device handling + * net/dsa/user.c - user device handling * Copyright (c) 2008-2009 Marvell Semiconductor */ @@ -23,13 +23,13 @@ #include <linux/netpoll.h> #include <linux/string.h> +#include "conduit.h" #include "dsa.h" -#include "port.h" -#include "master.h" #include "netlink.h" -#include "slave.h" +#include "port.h" #include "switch.h" #include "tag.h" +#include "user.h" struct dsa_switchdev_event_work { struct net_device *dev; @@ -79,13 +79,13 @@ static bool dsa_switch_supports_mc_filtering(struct dsa_switch *ds) !ds->needs_standalone_vlan_filtering; } -static void dsa_slave_standalone_event_work(struct work_struct *work) +static void dsa_user_standalone_event_work(struct work_struct *work) { struct dsa_standalone_event_work *standalone_work = container_of(work, struct dsa_standalone_event_work, work); const unsigned char *addr = standalone_work->addr; struct net_device *dev = standalone_work->dev; - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_mdb mdb; struct dsa_switch *ds = dp->ds; u16 vid = standalone_work->vid; @@ -140,10 +140,10 @@ static void dsa_slave_standalone_event_work(struct work_struct *work) kfree(standalone_work); } -static int dsa_slave_schedule_standalone_work(struct net_device *dev, - enum dsa_standalone_event event, - const unsigned char *addr, - u16 vid) +static int dsa_user_schedule_standalone_work(struct net_device *dev, + enum dsa_standalone_event event, + const unsigned char *addr, + u16 vid) { struct dsa_standalone_event_work *standalone_work; @@ -151,7 +151,7 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev, if (!standalone_work) return -ENOMEM; - INIT_WORK(&standalone_work->work, dsa_slave_standalone_event_work); + INIT_WORK(&standalone_work->work, dsa_user_standalone_event_work); standalone_work->event = event; standalone_work->dev = dev; @@ -163,18 +163,18 @@ static int dsa_slave_schedule_standalone_work(struct net_device *dev, return 0; } -static int dsa_slave_host_vlan_rx_filtering(void *arg, int vid) +static int dsa_user_host_vlan_rx_filtering(void *arg, int vid) { struct dsa_host_vlan_rx_filtering_ctx *ctx = arg; - return dsa_slave_schedule_standalone_work(ctx->dev, ctx->event, + return dsa_user_schedule_standalone_work(ctx->dev, ctx->event, ctx->addr, vid); } -static int dsa_slave_vlan_for_each(struct net_device *dev, - int (*cb)(void *arg, int vid), void *arg) +static int dsa_user_vlan_for_each(struct net_device *dev, + int (*cb)(void *arg, int vid), void *arg) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_vlan *v; int err; @@ -193,99 +193,99 @@ static int dsa_slave_vlan_for_each(struct net_device *dev, return 0; } -static int dsa_slave_sync_uc(struct net_device *dev, - const unsigned char *addr) +static int dsa_user_sync_uc(struct net_device *dev, + const unsigned char *addr) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_host_vlan_rx_filtering_ctx ctx = { .dev = dev, .addr = addr, .event = DSA_UC_ADD, }; - dev_uc_add(master, addr); + dev_uc_add(conduit, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, &ctx); } -static int dsa_slave_unsync_uc(struct net_device *dev, - const unsigned char *addr) +static int dsa_user_unsync_uc(struct net_device *dev, + const unsigned char *addr) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_host_vlan_rx_filtering_ctx ctx = { .dev = dev, .addr = addr, .event = DSA_UC_DEL, }; - dev_uc_del(master, addr); + dev_uc_del(conduit, addr); if (!dsa_switch_supports_uc_filtering(dp->ds)) return 0; - return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, &ctx); } -static int dsa_slave_sync_mc(struct net_device *dev, - const unsigned char *addr) +static int dsa_user_sync_mc(struct net_device *dev, + const unsigned char *addr) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_host_vlan_rx_filtering_ctx ctx = { .dev = dev, .addr = addr, .event = DSA_MC_ADD, }; - dev_mc_add(master, addr); + dev_mc_add(conduit, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, &ctx); } -static int dsa_slave_unsync_mc(struct net_device *dev, - const unsigned char *addr) +static int dsa_user_unsync_mc(struct net_device *dev, + const unsigned char *addr) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_host_vlan_rx_filtering_ctx ctx = { .dev = dev, .addr = addr, .event = DSA_MC_DEL, }; - dev_mc_del(master, addr); + dev_mc_del(conduit, addr); if (!dsa_switch_supports_mc_filtering(dp->ds)) return 0; - return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, + return dsa_user_vlan_for_each(dev, dsa_user_host_vlan_rx_filtering, &ctx); } -void dsa_slave_sync_ha(struct net_device *dev) +void dsa_user_sync_ha(struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; netif_addr_lock_bh(dev); netdev_for_each_synced_mc_addr(ha, dev) - dsa_slave_sync_mc(dev, ha->addr); + dsa_user_sync_mc(dev, ha->addr); netdev_for_each_synced_uc_addr(ha, dev) - dsa_slave_sync_uc(dev, ha->addr); + dsa_user_sync_uc(dev, ha->addr); netif_addr_unlock_bh(dev); @@ -294,19 +294,19 @@ void dsa_slave_sync_ha(struct net_device *dev) dsa_flush_workqueue(); } -void dsa_slave_unsync_ha(struct net_device *dev) +void dsa_user_unsync_ha(struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct netdev_hw_addr *ha; netif_addr_lock_bh(dev); netdev_for_each_synced_uc_addr(ha, dev) - dsa_slave_unsync_uc(dev, ha->addr); + dsa_user_unsync_uc(dev, ha->addr); netdev_for_each_synced_mc_addr(ha, dev) - dsa_slave_unsync_mc(dev, ha->addr); + dsa_user_unsync_mc(dev, ha->addr); netif_addr_unlock_bh(dev); @@ -315,8 +315,8 @@ void dsa_slave_unsync_ha(struct net_device *dev) dsa_flush_workqueue(); } -/* slave mii_bus handling ***************************************************/ -static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) +/* user mii_bus handling ***************************************************/ +static int dsa_user_phy_read(struct mii_bus *bus, int addr, int reg) { struct dsa_switch *ds = bus->priv; @@ -326,7 +326,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) return 0xffff; } -static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) +static int dsa_user_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) { struct dsa_switch *ds = bus->priv; @@ -336,35 +336,35 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) return 0; } -void dsa_slave_mii_bus_init(struct dsa_switch *ds) +void dsa_user_mii_bus_init(struct dsa_switch *ds) { - ds->slave_mii_bus->priv = (void *)ds; - ds->slave_mii_bus->name = "dsa slave smi"; - ds->slave_mii_bus->read = dsa_slave_phy_read; - ds->slave_mii_bus->write = dsa_slave_phy_write; - snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", + ds->user_mii_bus->priv = (void *)ds; + ds->user_mii_bus->name = "dsa user smi"; + ds->user_mii_bus->read = dsa_user_phy_read; + ds->user_mii_bus->write = dsa_user_phy_write; + snprintf(ds->user_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", ds->dst->index, ds->index); - ds->slave_mii_bus->parent = ds->dev; - ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; + ds->user_mii_bus->parent = ds->dev; + ds->user_mii_bus->phy_mask = ~ds->phys_mii_mask; } -/* slave device handling ****************************************************/ -static int dsa_slave_get_iflink(const struct net_device *dev) +/* user device handling ****************************************************/ +static int dsa_user_get_iflink(const struct net_device *dev) { - return dsa_slave_to_master(dev)->ifindex; + return dsa_user_to_conduit(dev)->ifindex; } -static int dsa_slave_open(struct net_device *dev) +static int dsa_user_open(struct net_device *dev) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int err; - err = dev_open(master, NULL); + err = dev_open(conduit, NULL); if (err < 0) { - netdev_err(dev, "failed to open master %s\n", master->name); + netdev_err(dev, "failed to open conduit %s\n", conduit->name); goto out; } @@ -374,8 +374,8 @@ static int dsa_slave_open(struct net_device *dev) goto out; } - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) { - err = dev_uc_add(master, dev->dev_addr); + if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) { + err = dev_uc_add(conduit, dev->dev_addr); if (err < 0) goto del_host_addr; } @@ -387,8 +387,8 @@ static int dsa_slave_open(struct net_device *dev) return 0; del_unicast: - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) - dev_uc_del(master, dev->dev_addr); + if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) + dev_uc_del(conduit, dev->dev_addr); del_host_addr: if (dsa_switch_supports_uc_filtering(ds)) dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); @@ -396,16 +396,16 @@ out: return err; } -static int dsa_slave_close(struct net_device *dev) +static int dsa_user_close(struct net_device *dev) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; dsa_port_disable_rt(dp); - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) - dev_uc_del(master, dev->dev_addr); + if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) + dev_uc_del(conduit, dev->dev_addr); if (dsa_switch_supports_uc_filtering(ds)) dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); @@ -413,43 +413,43 @@ static int dsa_slave_close(struct net_device *dev) return 0; } -static void dsa_slave_manage_host_flood(struct net_device *dev) +static void dsa_user_manage_host_flood(struct net_device *dev) { bool mc = dev->flags & (IFF_PROMISC | IFF_ALLMULTI); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); bool uc = dev->flags & IFF_PROMISC; dsa_port_set_host_flood(dp, uc, mc); } -static void dsa_slave_change_rx_flags(struct net_device *dev, int change) +static void dsa_user_change_rx_flags(struct net_device *dev, int change) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (change & IFF_ALLMULTI) - dev_set_allmulti(master, + dev_set_allmulti(conduit, dev->flags & IFF_ALLMULTI ? 1 : -1); if (change & IFF_PROMISC) - dev_set_promiscuity(master, + dev_set_promiscuity(conduit, dev->flags & IFF_PROMISC ? 1 : -1); if (dsa_switch_supports_uc_filtering(ds) && dsa_switch_supports_mc_filtering(ds)) - dsa_slave_manage_host_flood(dev); + dsa_user_manage_host_flood(dev); } -static void dsa_slave_set_rx_mode(struct net_device *dev) +static void dsa_user_set_rx_mode(struct net_device *dev) { - __dev_mc_sync(dev, dsa_slave_sync_mc, dsa_slave_unsync_mc); - __dev_uc_sync(dev, dsa_slave_sync_uc, dsa_slave_unsync_uc); + __dev_mc_sync(dev, dsa_user_sync_mc, dsa_user_unsync_mc); + __dev_uc_sync(dev, dsa_user_sync_uc, dsa_user_unsync_uc); } -static int dsa_slave_set_mac_address(struct net_device *dev, void *a) +static int dsa_user_set_mac_address(struct net_device *dev, void *a) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct sockaddr *addr = a; int err; @@ -457,8 +457,15 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + if (ds->ops->port_set_mac_address) { + err = ds->ops->port_set_mac_address(ds, dp->index, + addr->sa_data); + if (err) + return err; + } + /* If the port is down, the address isn't synced yet to hardware or - * to the DSA master, so there is nothing to change. + * to the DSA conduit, so there is nothing to change. */ if (!(dev->flags & IFF_UP)) goto out_change_dev_addr; @@ -469,14 +476,14 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a) return err; } - if (!ether_addr_equal(addr->sa_data, master->dev_addr)) { - err = dev_uc_add(master, addr->sa_data); + if (!ether_addr_equal(addr->sa_data, conduit->dev_addr)) { + err = dev_uc_add(conduit, addr->sa_data); if (err < 0) goto del_unicast; } - if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) - dev_uc_del(master, dev->dev_addr); + if (!ether_addr_equal(dev->dev_addr, conduit->dev_addr)) + dev_uc_del(conduit, dev->dev_addr); if (dsa_switch_supports_uc_filtering(ds)) dsa_port_standalone_host_fdb_del(dp, dev->dev_addr, 0); @@ -493,7 +500,7 @@ del_unicast: return err; } -struct dsa_slave_dump_ctx { +struct dsa_user_dump_ctx { struct net_device *dev; struct sk_buff *skb; struct netlink_callback *cb; @@ -501,10 +508,10 @@ struct dsa_slave_dump_ctx { }; static int -dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid, - bool is_static, void *data) +dsa_user_port_fdb_do_dump(const unsigned char *addr, u16 vid, + bool is_static, void *data) { - struct dsa_slave_dump_ctx *dump = data; + struct dsa_user_dump_ctx *dump = data; u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; @@ -545,12 +552,12 @@ nla_put_failure: } static int -dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, struct net_device *filter_dev, - int *idx) +dsa_user_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, struct net_device *filter_dev, + int *idx) { - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_dump_ctx dump = { + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_user_dump_ctx dump = { .dev = dev, .skb = skb, .cb = cb, @@ -558,15 +565,15 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, }; int err; - err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump); + err = dsa_port_fdb_dump(dp, dsa_user_port_fdb_do_dump, &dump); *idx = dump.idx; return err; } -static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +static int dsa_user_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_user_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; int port = p->dp->index; @@ -585,11 +592,11 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return phylink_mii_ioctl(p->dp->pl, ifr, cmd); } -static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, - const struct switchdev_attr *attr, - struct netlink_ext_ack *extack) +static int dsa_user_port_attr_set(struct net_device *dev, const void *ctx, + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); int ret; if (ctx && ctx != dp) @@ -656,13 +663,13 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, /* Must be called under rcu_read_lock() */ static int -dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, - const struct switchdev_obj_port_vlan *vlan) +dsa_user_vlan_check_for_8021q_uppers(struct net_device *user, + const struct switchdev_obj_port_vlan *vlan) { struct net_device *upper_dev; struct list_head *iter; - netdev_for_each_upper_dev_rcu(slave, upper_dev, iter) { + netdev_for_each_upper_dev_rcu(user, upper_dev, iter) { u16 vid; if (!is_vlan_dev(upper_dev)) @@ -676,11 +683,11 @@ dsa_slave_vlan_check_for_8021q_uppers(struct net_device *slave, return 0; } -static int dsa_slave_vlan_add(struct net_device *dev, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) +static int dsa_user_vlan_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan *vlan; int err; @@ -696,7 +703,7 @@ static int dsa_slave_vlan_add(struct net_device *dev, */ if (br_vlan_enabled(dsa_port_bridge_dev_get(dp))) { rcu_read_lock(); - err = dsa_slave_vlan_check_for_8021q_uppers(dev, vlan); + err = dsa_user_vlan_check_for_8021q_uppers(dev, vlan); rcu_read_unlock(); if (err) { NL_SET_ERR_MSG_MOD(extack, @@ -711,11 +718,11 @@ static int dsa_slave_vlan_add(struct net_device *dev, /* Offload a VLAN installed on the bridge or on a foreign interface by * installing it as a VLAN towards the CPU port. */ -static int dsa_slave_host_vlan_add(struct net_device *dev, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) +static int dsa_user_host_vlan_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan vlan; /* Do nothing if this is a software bridge */ @@ -737,11 +744,11 @@ static int dsa_slave_host_vlan_add(struct net_device *dev, return dsa_port_host_vlan_add(dp, &vlan, extack); } -static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, - const struct switchdev_obj *obj, - struct netlink_ext_ack *extack) +static int dsa_user_port_obj_add(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj, + struct netlink_ext_ack *extack) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); int err; if (ctx && ctx != dp) @@ -762,9 +769,9 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, break; case SWITCHDEV_OBJ_ID_PORT_VLAN: if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) - err = dsa_slave_vlan_add(dev, obj, extack); + err = dsa_user_vlan_add(dev, obj, extack); else - err = dsa_slave_host_vlan_add(dev, obj, extack); + err = dsa_user_host_vlan_add(dev, obj, extack); break; case SWITCHDEV_OBJ_ID_MRP: if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) @@ -787,10 +794,10 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const void *ctx, return err; } -static int dsa_slave_vlan_del(struct net_device *dev, - const struct switchdev_obj *obj) +static int dsa_user_vlan_del(struct net_device *dev, + const struct switchdev_obj *obj) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan *vlan; if (dsa_port_skip_vlan_configuration(dp)) @@ -801,10 +808,10 @@ static int dsa_slave_vlan_del(struct net_device *dev, return dsa_port_vlan_del(dp, vlan); } -static int dsa_slave_host_vlan_del(struct net_device *dev, - const struct switchdev_obj *obj) +static int dsa_user_host_vlan_del(struct net_device *dev, + const struct switchdev_obj *obj) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan *vlan; /* Do nothing if this is a software bridge */ @@ -819,10 +826,10 @@ static int dsa_slave_host_vlan_del(struct net_device *dev, return dsa_port_host_vlan_del(dp, vlan); } -static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, - const struct switchdev_obj *obj) +static int dsa_user_port_obj_del(struct net_device *dev, const void *ctx, + const struct switchdev_obj *obj) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); int err; if (ctx && ctx != dp) @@ -843,9 +850,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, break; case SWITCHDEV_OBJ_ID_PORT_VLAN: if (dsa_port_offloads_bridge_port(dp, obj->orig_dev)) - err = dsa_slave_vlan_del(dev, obj); + err = dsa_user_vlan_del(dev, obj); else - err = dsa_slave_host_vlan_del(dev, obj); + err = dsa_user_host_vlan_del(dev, obj); break; case SWITCHDEV_OBJ_ID_MRP: if (!dsa_port_offloads_bridge_dev(dp, obj->orig_dev)) @@ -868,11 +875,11 @@ static int dsa_slave_port_obj_del(struct net_device *dev, const void *ctx, return err; } -static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, - struct sk_buff *skb) +static inline netdev_tx_t dsa_user_netpoll_send_skb(struct net_device *dev, + struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_user_priv *p = netdev_priv(dev); return netpoll_send_skb(p->netpoll, skb); #else @@ -881,7 +888,7 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, #endif } -static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p, +static void dsa_skb_tx_timestamp(struct dsa_user_priv *p, struct sk_buff *skb) { struct dsa_switch *ds = p->dp->ds; @@ -901,12 +908,12 @@ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev) * tag to be successfully transmitted */ if (unlikely(netpoll_tx_running(dev))) - return dsa_slave_netpoll_send_skb(dev, skb); + return dsa_user_netpoll_send_skb(dev, skb); /* Queue the SKB for transmission on the parent interface, but * do not modify its EtherType */ - skb->dev = dsa_slave_to_master(dev); + skb->dev = dsa_user_to_conduit(dev); dev_queue_xmit(skb); return NETDEV_TX_OK; @@ -920,7 +927,7 @@ static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev) /* For tail taggers, we need to pad short frames ourselves, to ensure * that the tail tag does not fail at its role of being at the end of - * the packet, once the master interface pads the frame. Account for + * the packet, once the conduit interface pads the frame. Account for * that pad length here, and pad later. */ if (unlikely(needed_tailroom && skb->len < ETH_ZLEN)) @@ -937,9 +944,9 @@ static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev) GFP_ATOMIC); } -static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_user_priv *p = netdev_priv(dev); struct sk_buff *nskb; dev_sw_netstats_tx_add(dev, 1, skb->len); @@ -974,17 +981,17 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) /* ethtool operations *******************************************************/ -static void dsa_slave_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *drvinfo) +static void dsa_user_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) { strscpy(drvinfo->driver, "dsa", sizeof(drvinfo->driver)); strscpy(drvinfo->fw_version, "N/A", sizeof(drvinfo->fw_version)); strscpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info)); } -static int dsa_slave_get_regs_len(struct net_device *dev) +static int dsa_user_get_regs_len(struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_regs_len) @@ -994,25 +1001,25 @@ static int dsa_slave_get_regs_len(struct net_device *dev) } static void -dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) +dsa_user_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_regs) ds->ops->get_regs(ds, dp->index, regs, _p); } -static int dsa_slave_nway_reset(struct net_device *dev) +static int dsa_user_nway_reset(struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); return phylink_ethtool_nway_reset(dp->pl); } -static int dsa_slave_get_eeprom_len(struct net_device *dev) +static int dsa_user_get_eeprom_len(struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->cd && ds->cd->eeprom_len) @@ -1024,10 +1031,10 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev) return 0; } -static int dsa_slave_get_eeprom(struct net_device *dev, - struct ethtool_eeprom *eeprom, u8 *data) +static int dsa_user_get_eeprom(struct net_device *dev, + struct ethtool_eeprom *eeprom, u8 *data) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_eeprom) @@ -1036,10 +1043,10 @@ static int dsa_slave_get_eeprom(struct net_device *dev, return -EOPNOTSUPP; } -static int dsa_slave_set_eeprom(struct net_device *dev, - struct ethtool_eeprom *eeprom, u8 *data) +static int dsa_user_set_eeprom(struct net_device *dev, + struct ethtool_eeprom *eeprom, u8 *data) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->set_eeprom) @@ -1048,10 +1055,10 @@ static int dsa_slave_set_eeprom(struct net_device *dev, return -EOPNOTSUPP; } -static void dsa_slave_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) +static void dsa_user_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (stringset == ETH_SS_STATS) { @@ -1070,11 +1077,11 @@ static void dsa_slave_get_strings(struct net_device *dev, } -static void dsa_slave_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) +static void dsa_user_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct pcpu_sw_netstats *s; unsigned int start; @@ -1100,9 +1107,9 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, ds->ops->get_ethtool_stats(ds, dp->index, data + 4); } -static int dsa_slave_get_sset_count(struct net_device *dev, int sset) +static int dsa_user_get_sset_count(struct net_device *dev, int sset) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (sset == ETH_SS_STATS) { @@ -1122,20 +1129,20 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) return -EOPNOTSUPP; } -static void dsa_slave_get_eth_phy_stats(struct net_device *dev, - struct ethtool_eth_phy_stats *phy_stats) +static void dsa_user_get_eth_phy_stats(struct net_device *dev, + struct ethtool_eth_phy_stats *phy_stats) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_eth_phy_stats) ds->ops->get_eth_phy_stats(ds, dp->index, phy_stats); } -static void dsa_slave_get_eth_mac_stats(struct net_device *dev, - struct ethtool_eth_mac_stats *mac_stats) +static void dsa_user_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_eth_mac_stats) @@ -1143,10 +1150,10 @@ static void dsa_slave_get_eth_mac_stats(struct net_device *dev, } static void -dsa_slave_get_eth_ctrl_stats(struct net_device *dev, - struct ethtool_eth_ctrl_stats *ctrl_stats) +dsa_user_get_eth_ctrl_stats(struct net_device *dev, + struct ethtool_eth_ctrl_stats *ctrl_stats) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_eth_ctrl_stats) @@ -1154,21 +1161,21 @@ dsa_slave_get_eth_ctrl_stats(struct net_device *dev, } static void -dsa_slave_get_rmon_stats(struct net_device *dev, - struct ethtool_rmon_stats *rmon_stats, - const struct ethtool_rmon_hist_range **ranges) +dsa_user_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_rmon_stats) ds->ops->get_rmon_stats(ds, dp->index, rmon_stats, ranges); } -static void dsa_slave_net_selftest(struct net_device *ndev, - struct ethtool_test *etest, u64 *buf) +static void dsa_user_net_selftest(struct net_device *ndev, + struct ethtool_test *etest, u64 *buf) { - struct dsa_port *dp = dsa_slave_to_port(ndev); + struct dsa_port *dp = dsa_user_to_port(ndev); struct dsa_switch *ds = dp->ds; if (ds->ops->self_test) { @@ -1179,10 +1186,10 @@ static void dsa_slave_net_selftest(struct net_device *ndev, net_selftest(ndev, etest, buf); } -static int dsa_slave_get_mm(struct net_device *dev, - struct ethtool_mm_state *state) +static int dsa_user_get_mm(struct net_device *dev, + struct ethtool_mm_state *state) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->get_mm) @@ -1191,10 +1198,10 @@ static int dsa_slave_get_mm(struct net_device *dev, return ds->ops->get_mm(ds, dp->index, state); } -static int dsa_slave_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg, - struct netlink_ext_ack *extack) +static int dsa_user_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg, + struct netlink_ext_ack *extack) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->set_mm) @@ -1203,19 +1210,19 @@ static int dsa_slave_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg, return ds->ops->set_mm(ds, dp->index, cfg, extack); } -static void dsa_slave_get_mm_stats(struct net_device *dev, - struct ethtool_mm_stats *stats) +static void dsa_user_get_mm_stats(struct net_device *dev, + struct ethtool_mm_stats *stats) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_mm_stats) ds->ops->get_mm_stats(ds, dp->index, stats); } -static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) +static void dsa_user_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; phylink_ethtool_get_wol(dp->pl, w); @@ -1224,9 +1231,9 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) ds->ops->get_wol(ds, dp->index, w); } -static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) +static int dsa_user_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int ret = -EOPNOTSUPP; @@ -1238,9 +1245,9 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) return ret; } -static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) +static int dsa_user_set_eee(struct net_device *dev, struct ethtool_eee *e) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int ret; @@ -1258,9 +1265,9 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) return phylink_ethtool_set_eee(dp->pl, e); } -static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) +static int dsa_user_get_eee(struct net_device *dev, struct ethtool_eee *e) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int ret; @@ -1278,54 +1285,54 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) return phylink_ethtool_get_eee(dp->pl, e); } -static int dsa_slave_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) +static int dsa_user_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); return phylink_ethtool_ksettings_get(dp->pl, cmd); } -static int dsa_slave_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) +static int dsa_user_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); return phylink_ethtool_ksettings_set(dp->pl, cmd); } -static void dsa_slave_get_pause_stats(struct net_device *dev, - struct ethtool_pause_stats *pause_stats) +static void dsa_user_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_pause_stats) ds->ops->get_pause_stats(ds, dp->index, pause_stats); } -static void dsa_slave_get_pauseparam(struct net_device *dev, - struct ethtool_pauseparam *pause) +static void dsa_user_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); phylink_ethtool_get_pauseparam(dp->pl, pause); } -static int dsa_slave_set_pauseparam(struct net_device *dev, - struct ethtool_pauseparam *pause) +static int dsa_user_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); return phylink_ethtool_set_pauseparam(dp->pl, pause); } #ifdef CONFIG_NET_POLL_CONTROLLER -static int dsa_slave_netpoll_setup(struct net_device *dev, - struct netpoll_info *ni) +static int dsa_user_netpoll_setup(struct net_device *dev, + struct netpoll_info *ni) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_user_priv *p = netdev_priv(dev); struct netpoll *netpoll; int err = 0; @@ -1333,7 +1340,7 @@ static int dsa_slave_netpoll_setup(struct net_device *dev, if (!netpoll) return -ENOMEM; - err = __netpoll_setup(netpoll, master); + err = __netpoll_setup(netpoll, conduit); if (err) { kfree(netpoll); goto out; @@ -1344,9 +1351,9 @@ out: return err; } -static void dsa_slave_netpoll_cleanup(struct net_device *dev) +static void dsa_user_netpoll_cleanup(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_user_priv *p = netdev_priv(dev); struct netpoll *netpoll = p->netpoll; if (!netpoll) @@ -1357,15 +1364,15 @@ static void dsa_slave_netpoll_cleanup(struct net_device *dev) __netpoll_free(netpoll); } -static void dsa_slave_poll_controller(struct net_device *dev) +static void dsa_user_poll_controller(struct net_device *dev) { } #endif static struct dsa_mall_tc_entry * -dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) +dsa_user_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_user_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) @@ -1376,13 +1383,13 @@ dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) } static int -dsa_slave_add_cls_matchall_mirred(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) +dsa_user_add_cls_matchall_mirred(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) { struct netlink_ext_ack *extack = cls->common.extack; - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_user_priv *p = netdev_priv(dev); struct dsa_mall_mirror_tc_entry *mirror; struct dsa_mall_tc_entry *mall_tc_entry; struct dsa_switch *ds = dp->ds; @@ -1402,7 +1409,7 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev, if (!act->dev) return -EINVAL; - if (!dsa_slave_dev_check(act->dev)) + if (!dsa_user_dev_check(act->dev)) return -EOPNOTSUPP; mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); @@ -1413,7 +1420,7 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev, mall_tc_entry->type = DSA_PORT_MALL_MIRROR; mirror = &mall_tc_entry->mirror; - to_dp = dsa_slave_to_port(act->dev); + to_dp = dsa_user_to_port(act->dev); mirror->to_local_port = to_dp->index; mirror->ingress = ingress; @@ -1430,13 +1437,13 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev, } static int -dsa_slave_add_cls_matchall_police(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) +dsa_user_add_cls_matchall_police(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) { struct netlink_ext_ack *extack = cls->common.extack; - struct dsa_port *dp = dsa_slave_to_port(dev); - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_user_to_port(dev); + struct dsa_user_priv *p = netdev_priv(dev); struct dsa_mall_policer_tc_entry *policer; struct dsa_mall_tc_entry *mall_tc_entry; struct dsa_switch *ds = dp->ds; @@ -1490,31 +1497,31 @@ dsa_slave_add_cls_matchall_police(struct net_device *dev, return err; } -static int dsa_slave_add_cls_matchall(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) +static int dsa_user_add_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) { int err = -EOPNOTSUPP; if (cls->common.protocol == htons(ETH_P_ALL) && flow_offload_has_one_action(&cls->rule->action) && cls->rule->action.entries[0].id == FLOW_ACTION_MIRRED) - err = dsa_slave_add_cls_matchall_mirred(dev, cls, ingress); + err = dsa_user_add_cls_matchall_mirred(dev, cls, ingress); else if (flow_offload_has_one_action(&cls->rule->action) && cls->rule->action.entries[0].id == FLOW_ACTION_POLICE) - err = dsa_slave_add_cls_matchall_police(dev, cls, ingress); + err = dsa_user_add_cls_matchall_police(dev, cls, ingress); return err; } -static void dsa_slave_del_cls_matchall(struct net_device *dev, - struct tc_cls_matchall_offload *cls) +static void dsa_user_del_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_mall_tc_entry *mall_tc_entry; struct dsa_switch *ds = dp->ds; - mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie); + mall_tc_entry = dsa_user_mall_tc_entry_find(dev, cls->cookie); if (!mall_tc_entry) return; @@ -1537,29 +1544,29 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, kfree(mall_tc_entry); } -static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, - struct tc_cls_matchall_offload *cls, - bool ingress) +static int dsa_user_setup_tc_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) { if (cls->common.chain_index) return -EOPNOTSUPP; switch (cls->command) { case TC_CLSMATCHALL_REPLACE: - return dsa_slave_add_cls_matchall(dev, cls, ingress); + return dsa_user_add_cls_matchall(dev, cls, ingress); case TC_CLSMATCHALL_DESTROY: - dsa_slave_del_cls_matchall(dev, cls); + dsa_user_del_cls_matchall(dev, cls); return 0; default: return -EOPNOTSUPP; } } -static int dsa_slave_add_cls_flower(struct net_device *dev, - struct flow_cls_offload *cls, - bool ingress) +static int dsa_user_add_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -1569,11 +1576,11 @@ static int dsa_slave_add_cls_flower(struct net_device *dev, return ds->ops->cls_flower_add(ds, port, cls, ingress); } -static int dsa_slave_del_cls_flower(struct net_device *dev, - struct flow_cls_offload *cls, - bool ingress) +static int dsa_user_del_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -1583,11 +1590,11 @@ static int dsa_slave_del_cls_flower(struct net_device *dev, return ds->ops->cls_flower_del(ds, port, cls, ingress); } -static int dsa_slave_stats_cls_flower(struct net_device *dev, - struct flow_cls_offload *cls, - bool ingress) +static int dsa_user_stats_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -1597,24 +1604,24 @@ static int dsa_slave_stats_cls_flower(struct net_device *dev, return ds->ops->cls_flower_stats(ds, port, cls, ingress); } -static int dsa_slave_setup_tc_cls_flower(struct net_device *dev, - struct flow_cls_offload *cls, - bool ingress) +static int dsa_user_setup_tc_cls_flower(struct net_device *dev, + struct flow_cls_offload *cls, + bool ingress) { switch (cls->command) { case FLOW_CLS_REPLACE: - return dsa_slave_add_cls_flower(dev, cls, ingress); + return dsa_user_add_cls_flower(dev, cls, ingress); case FLOW_CLS_DESTROY: - return dsa_slave_del_cls_flower(dev, cls, ingress); + return dsa_user_del_cls_flower(dev, cls, ingress); case FLOW_CLS_STATS: - return dsa_slave_stats_cls_flower(dev, cls, ingress); + return dsa_user_stats_cls_flower(dev, cls, ingress); default: return -EOPNOTSUPP; } } -static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, - void *cb_priv, bool ingress) +static int dsa_user_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv, bool ingress) { struct net_device *dev = cb_priv; @@ -1623,46 +1630,46 @@ static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, switch (type) { case TC_SETUP_CLSMATCHALL: - return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress); + return dsa_user_setup_tc_cls_matchall(dev, type_data, ingress); case TC_SETUP_CLSFLOWER: - return dsa_slave_setup_tc_cls_flower(dev, type_data, ingress); + return dsa_user_setup_tc_cls_flower(dev, type_data, ingress); default: return -EOPNOTSUPP; } } -static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type, - void *type_data, void *cb_priv) +static int dsa_user_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, void *cb_priv) { - return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true); + return dsa_user_setup_tc_block_cb(type, type_data, cb_priv, true); } -static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type, - void *type_data, void *cb_priv) +static int dsa_user_setup_tc_block_cb_eg(enum tc_setup_type type, + void *type_data, void *cb_priv) { - return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false); + return dsa_user_setup_tc_block_cb(type, type_data, cb_priv, false); } -static LIST_HEAD(dsa_slave_block_cb_list); +static LIST_HEAD(dsa_user_block_cb_list); -static int dsa_slave_setup_tc_block(struct net_device *dev, - struct flow_block_offload *f) +static int dsa_user_setup_tc_block(struct net_device *dev, + struct flow_block_offload *f) { struct flow_block_cb *block_cb; flow_setup_cb_t *cb; if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - cb = dsa_slave_setup_tc_block_cb_ig; + cb = dsa_user_setup_tc_block_cb_ig; else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) - cb = dsa_slave_setup_tc_block_cb_eg; + cb = dsa_user_setup_tc_block_cb_eg; else return -EOPNOTSUPP; - f->driver_block_list = &dsa_slave_block_cb_list; + f->driver_block_list = &dsa_user_block_cb_list; switch (f->command) { case FLOW_BLOCK_BIND: - if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list)) + if (flow_block_cb_is_busy(cb, dev, &dsa_user_block_cb_list)) return -EBUSY; block_cb = flow_block_cb_alloc(cb, dev, dev, NULL); @@ -1670,7 +1677,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, return PTR_ERR(block_cb); flow_block_cb_add(block_cb, f); - list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list); + list_add_tail(&block_cb->driver_list, &dsa_user_block_cb_list); return 0; case FLOW_BLOCK_UNBIND: block_cb = flow_block_cb_lookup(f->block, cb, dev); @@ -1685,28 +1692,28 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, } } -static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port, - void *type_data) +static int dsa_user_setup_ft_block(struct dsa_switch *ds, int port, + void *type_data) { - struct net_device *master = dsa_port_to_master(dsa_to_port(ds, port)); + struct net_device *conduit = dsa_port_to_conduit(dsa_to_port(ds, port)); - if (!master->netdev_ops->ndo_setup_tc) + if (!conduit->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; - return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data); + return conduit->netdev_ops->ndo_setup_tc(conduit, TC_SETUP_FT, type_data); } -static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +static int dsa_user_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; switch (type) { case TC_SETUP_BLOCK: - return dsa_slave_setup_tc_block(dev, type_data); + return dsa_user_setup_tc_block(dev, type_data); case TC_SETUP_FT: - return dsa_slave_setup_ft_block(ds, dp->index, type_data); + return dsa_user_setup_ft_block(ds, dp->index, type_data); default: break; } @@ -1717,10 +1724,10 @@ static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, return ds->ops->port_setup_tc(ds, dp->index, type, type_data); } -static int dsa_slave_get_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *nfc, u32 *rule_locs) +static int dsa_user_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc, u32 *rule_locs) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->get_rxnfc) @@ -1729,10 +1736,10 @@ static int dsa_slave_get_rxnfc(struct net_device *dev, return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs); } -static int dsa_slave_set_rxnfc(struct net_device *dev, - struct ethtool_rxnfc *nfc) +static int dsa_user_set_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *nfc) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (!ds->ops->set_rxnfc) @@ -1741,10 +1748,10 @@ static int dsa_slave_set_rxnfc(struct net_device *dev, return ds->ops->set_rxnfc(ds, dp->index, nfc); } -static int dsa_slave_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *ts) +static int dsa_user_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *ts) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_user_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->dp->ds; if (!ds->ops->get_ts_info) @@ -1753,10 +1760,10 @@ static int dsa_slave_get_ts_info(struct net_device *dev, return ds->ops->get_ts_info(ds, p->dp->index, ts); } -static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, - u16 vid) +static int dsa_user_vlan_rx_add_vid(struct net_device *dev, __be16 proto, + u16 vid) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan vlan = { .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, .vid = vid, @@ -1803,15 +1810,15 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, if (dsa_switch_supports_mc_filtering(ds)) { netdev_for_each_synced_mc_addr(ha, dev) { - dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, - ha->addr, vid); + dsa_user_schedule_standalone_work(dev, DSA_MC_ADD, + ha->addr, vid); } } if (dsa_switch_supports_uc_filtering(ds)) { netdev_for_each_synced_uc_addr(ha, dev) { - dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, - ha->addr, vid); + dsa_user_schedule_standalone_work(dev, DSA_UC_ADD, + ha->addr, vid); } } @@ -1828,10 +1835,10 @@ rollback: return ret; } -static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, - u16 vid) +static int dsa_user_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, + u16 vid) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct switchdev_obj_port_vlan vlan = { .vid = vid, /* This API only allows programming tagged, non-PVID VIDs */ @@ -1867,15 +1874,15 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, if (dsa_switch_supports_mc_filtering(ds)) { netdev_for_each_synced_mc_addr(ha, dev) { - dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, - ha->addr, vid); + dsa_user_schedule_standalone_work(dev, DSA_MC_DEL, + ha->addr, vid); } } if (dsa_switch_supports_uc_filtering(ds)) { netdev_for_each_synced_uc_addr(ha, dev) { - dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, - ha->addr, vid); + dsa_user_schedule_standalone_work(dev, DSA_UC_DEL, + ha->addr, vid); } } @@ -1886,18 +1893,18 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, return 0; } -static int dsa_slave_restore_vlan(struct net_device *vdev, int vid, void *arg) +static int dsa_user_restore_vlan(struct net_device *vdev, int vid, void *arg) { __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); - return dsa_slave_vlan_rx_add_vid(arg, proto, vid); + return dsa_user_vlan_rx_add_vid(arg, proto, vid); } -static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg) +static int dsa_user_clear_vlan(struct net_device *vdev, int vid, void *arg) { __be16 proto = vdev ? vlan_dev_vlan_proto(vdev) : htons(ETH_P_8021Q); - return dsa_slave_vlan_rx_kill_vid(arg, proto, vid); + return dsa_user_vlan_rx_kill_vid(arg, proto, vid); } /* Keep the VLAN RX filtering list in sync with the hardware only if VLAN @@ -1931,26 +1938,26 @@ static int dsa_slave_clear_vlan(struct net_device *vdev, int vid, void *arg) * - the bridge VLANs * - the 8021q upper VLANs */ -int dsa_slave_manage_vlan_filtering(struct net_device *slave, - bool vlan_filtering) +int dsa_user_manage_vlan_filtering(struct net_device *user, + bool vlan_filtering) { int err; if (vlan_filtering) { - slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + user->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - err = vlan_for_each(slave, dsa_slave_restore_vlan, slave); + err = vlan_for_each(user, dsa_user_restore_vlan, user); if (err) { - vlan_for_each(slave, dsa_slave_clear_vlan, slave); - slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + vlan_for_each(user, dsa_user_clear_vlan, user); + user->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; return err; } } else { - err = vlan_for_each(slave, dsa_slave_clear_vlan, slave); + err = vlan_for_each(user, dsa_user_clear_vlan, user); if (err) return err; - slave->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + user->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; } return 0; @@ -2021,7 +2028,7 @@ static void dsa_bridge_mtu_normalization(struct dsa_port *dp) list_for_each_entry(dst, &dsa_tree_list, list) { list_for_each_entry(other_dp, &dst->ports, list) { struct dsa_hw_port *hw_port; - struct net_device *slave; + struct net_device *user; if (other_dp->type != DSA_PORT_TYPE_USER) continue; @@ -2032,17 +2039,17 @@ static void dsa_bridge_mtu_normalization(struct dsa_port *dp) if (!other_dp->ds->mtu_enforcement_ingress) continue; - slave = other_dp->slave; + user = other_dp->user; - if (min_mtu > slave->mtu) - min_mtu = slave->mtu; + if (min_mtu > user->mtu) + min_mtu = user->mtu; hw_port = kzalloc(sizeof(*hw_port), GFP_KERNEL); if (!hw_port) goto out; - hw_port->dev = slave; - hw_port->old_mtu = slave->mtu; + hw_port->dev = user; + hw_port->old_mtu = user->mtu; list_add(&hw_port->list, &hw_port_list); } @@ -2052,7 +2059,7 @@ static void dsa_bridge_mtu_normalization(struct dsa_port *dp) * interface's MTU first, regardless of whether the intention of the * user was to raise or lower it. */ - err = dsa_hw_port_list_set_mtu(&hw_port_list, dp->slave->mtu); + err = dsa_hw_port_list_set_mtu(&hw_port_list, dp->user->mtu); if (!err) goto out; @@ -2066,16 +2073,16 @@ out: dsa_hw_port_list_free(&hw_port_list); } -int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) +int dsa_user_change_mtu(struct net_device *dev, int new_mtu) { - struct net_device *master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_port *cpu_dp = dp->cpu_dp; struct dsa_switch *ds = dp->ds; struct dsa_port *other_dp; int largest_mtu = 0; - int new_master_mtu; - int old_master_mtu; + int new_conduit_mtu; + int old_conduit_mtu; int mtu_limit; int overhead; int cpu_mtu; @@ -2085,44 +2092,44 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) return -EOPNOTSUPP; dsa_tree_for_each_user_port(other_dp, ds->dst) { - int slave_mtu; + int user_mtu; - /* During probe, this function will be called for each slave + /* During probe, this function will be called for each user * device, while not all of them have been allocated. That's * ok, it doesn't change what the maximum is, so ignore it. */ - if (!other_dp->slave) + if (!other_dp->user) continue; /* Pretend that we already applied the setting, which we * actually haven't (still haven't done all integrity checks) */ if (dp == other_dp) - slave_mtu = new_mtu; + user_mtu = new_mtu; else - slave_mtu = other_dp->slave->mtu; + user_mtu = other_dp->user->mtu; - if (largest_mtu < slave_mtu) - largest_mtu = slave_mtu; + if (largest_mtu < user_mtu) + largest_mtu = user_mtu; } overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops); - mtu_limit = min_t(int, master->max_mtu, dev->max_mtu + overhead); - old_master_mtu = master->mtu; - new_master_mtu = largest_mtu + overhead; - if (new_master_mtu > mtu_limit) + mtu_limit = min_t(int, conduit->max_mtu, dev->max_mtu + overhead); + old_conduit_mtu = conduit->mtu; + new_conduit_mtu = largest_mtu + overhead; + if (new_conduit_mtu > mtu_limit) return -ERANGE; - /* If the master MTU isn't over limit, there's no need to check the CPU + /* If the conduit MTU isn't over limit, there's no need to check the CPU * MTU, since that surely isn't either. */ cpu_mtu = largest_mtu; /* Start applying stuff */ - if (new_master_mtu != old_master_mtu) { - err = dev_set_mtu(master, new_master_mtu); + if (new_conduit_mtu != old_conduit_mtu) { + err = dev_set_mtu(conduit, new_conduit_mtu); if (err < 0) - goto out_master_failed; + goto out_conduit_failed; /* We only need to propagate the MTU of the CPU port to * upstream switches, so emit a notifier which updates them. @@ -2143,19 +2150,19 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) return 0; out_port_failed: - if (new_master_mtu != old_master_mtu) - dsa_port_mtu_change(cpu_dp, old_master_mtu - overhead); + if (new_conduit_mtu != old_conduit_mtu) + dsa_port_mtu_change(cpu_dp, old_conduit_mtu - overhead); out_cpu_failed: - if (new_master_mtu != old_master_mtu) - dev_set_mtu(master, old_master_mtu); -out_master_failed: + if (new_conduit_mtu != old_conduit_mtu) + dev_set_mtu(conduit, old_conduit_mtu); +out_conduit_failed: return err; } static int __maybe_unused -dsa_slave_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) +dsa_user_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; unsigned long mask, new_prio; int err, port = dp->index; @@ -2180,9 +2187,9 @@ dsa_slave_dcbnl_set_default_prio(struct net_device *dev, struct dcb_app *app) } static int __maybe_unused -dsa_slave_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) +dsa_user_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; unsigned long mask, new_prio; int err, port = dp->index; @@ -2213,29 +2220,29 @@ dsa_slave_dcbnl_add_dscp_prio(struct net_device *dev, struct dcb_app *app) return 0; } -static int __maybe_unused dsa_slave_dcbnl_ieee_setapp(struct net_device *dev, - struct dcb_app *app) +static int __maybe_unused dsa_user_dcbnl_ieee_setapp(struct net_device *dev, + struct dcb_app *app) { switch (app->selector) { case IEEE_8021QAZ_APP_SEL_ETHERTYPE: switch (app->protocol) { case 0: - return dsa_slave_dcbnl_set_default_prio(dev, app); + return dsa_user_dcbnl_set_default_prio(dev, app); default: return -EOPNOTSUPP; } break; case IEEE_8021QAZ_APP_SEL_DSCP: - return dsa_slave_dcbnl_add_dscp_prio(dev, app); + return dsa_user_dcbnl_add_dscp_prio(dev, app); default: return -EOPNOTSUPP; } } static int __maybe_unused -dsa_slave_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app) +dsa_user_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; unsigned long mask, new_prio; int err, port = dp->index; @@ -2260,9 +2267,9 @@ dsa_slave_dcbnl_del_default_prio(struct net_device *dev, struct dcb_app *app) } static int __maybe_unused -dsa_slave_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) +dsa_user_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int err, port = dp->index; u8 dscp = app->protocol; @@ -2283,20 +2290,20 @@ dsa_slave_dcbnl_del_dscp_prio(struct net_device *dev, struct dcb_app *app) return 0; } -static int __maybe_unused dsa_slave_dcbnl_ieee_delapp(struct net_device *dev, - struct dcb_app *app) +static int __maybe_unused dsa_user_dcbnl_ieee_delapp(struct net_device *dev, + struct dcb_app *app) { switch (app->selector) { case IEEE_8021QAZ_APP_SEL_ETHERTYPE: switch (app->protocol) { case 0: - return dsa_slave_dcbnl_del_default_prio(dev, app); + return dsa_user_dcbnl_del_default_prio(dev, app); default: return -EOPNOTSUPP; } break; case IEEE_8021QAZ_APP_SEL_DSCP: - return dsa_slave_dcbnl_del_dscp_prio(dev, app); + return dsa_user_dcbnl_del_dscp_prio(dev, app); default: return -EOPNOTSUPP; } @@ -2305,9 +2312,9 @@ static int __maybe_unused dsa_slave_dcbnl_ieee_delapp(struct net_device *dev, /* Pre-populate the DCB application priority table with the priorities * configured during switch setup, which we read from hardware here. */ -static int dsa_slave_dcbnl_init(struct net_device *dev) +static int dsa_user_dcbnl_init(struct net_device *dev) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; int port = dp->index; int err; @@ -2355,49 +2362,49 @@ static int dsa_slave_dcbnl_init(struct net_device *dev) return 0; } -static const struct ethtool_ops dsa_slave_ethtool_ops = { - .get_drvinfo = dsa_slave_get_drvinfo, - .get_regs_len = dsa_slave_get_regs_len, - .get_regs = dsa_slave_get_regs, - .nway_reset = dsa_slave_nway_reset, +static const struct ethtool_ops dsa_user_ethtool_ops = { + .get_drvinfo = dsa_user_get_drvinfo, + .get_regs_len = dsa_user_get_regs_len, + .get_regs = dsa_user_get_regs, + .nway_reset = dsa_user_nway_reset, .get_link = ethtool_op_get_link, - .get_eeprom_len = dsa_slave_get_eeprom_len, - .get_eeprom = dsa_slave_get_eeprom, - .set_eeprom = dsa_slave_set_eeprom, - .get_strings = dsa_slave_get_strings, - .get_ethtool_stats = dsa_slave_get_ethtool_stats, - .get_sset_count = dsa_slave_get_sset_count, - .get_eth_phy_stats = dsa_slave_get_eth_phy_stats, - .get_eth_mac_stats = dsa_slave_get_eth_mac_stats, - .get_eth_ctrl_stats = dsa_slave_get_eth_ctrl_stats, - .get_rmon_stats = dsa_slave_get_rmon_stats, - .set_wol = dsa_slave_set_wol, - .get_wol = dsa_slave_get_wol, - .set_eee = dsa_slave_set_eee, - .get_eee = dsa_slave_get_eee, - .get_link_ksettings = dsa_slave_get_link_ksettings, - .set_link_ksettings = dsa_slave_set_link_ksettings, - .get_pause_stats = dsa_slave_get_pause_stats, - .get_pauseparam = dsa_slave_get_pauseparam, - .set_pauseparam = dsa_slave_set_pauseparam, - .get_rxnfc = dsa_slave_get_rxnfc, - .set_rxnfc = dsa_slave_set_rxnfc, - .get_ts_info = dsa_slave_get_ts_info, - .self_test = dsa_slave_net_selftest, - .get_mm = dsa_slave_get_mm, - .set_mm = dsa_slave_set_mm, - .get_mm_stats = dsa_slave_get_mm_stats, + .get_eeprom_len = dsa_user_get_eeprom_len, + .get_eeprom = dsa_user_get_eeprom, + .set_eeprom = dsa_user_set_eeprom, + .get_strings = dsa_user_get_strings, + .get_ethtool_stats = dsa_user_get_ethtool_stats, + .get_sset_count = dsa_user_get_sset_count, + .get_eth_phy_stats = dsa_user_get_eth_phy_stats, + .get_eth_mac_stats = dsa_user_get_eth_mac_stats, + .get_eth_ctrl_stats = dsa_user_get_eth_ctrl_stats, + .get_rmon_stats = dsa_user_get_rmon_stats, + .set_wol = dsa_user_set_wol, + .get_wol = dsa_user_get_wol, + .set_eee = dsa_user_set_eee, + .get_eee = dsa_user_get_eee, + .get_link_ksettings = dsa_user_get_link_ksettings, + .set_link_ksettings = dsa_user_set_link_ksettings, + .get_pause_stats = dsa_user_get_pause_stats, + .get_pauseparam = dsa_user_get_pauseparam, + .set_pauseparam = dsa_user_set_pauseparam, + .get_rxnfc = dsa_user_get_rxnfc, + .set_rxnfc = dsa_user_set_rxnfc, + .get_ts_info = dsa_user_get_ts_info, + .self_test = dsa_user_net_selftest, + .get_mm = dsa_user_get_mm, + .set_mm = dsa_user_set_mm, + .get_mm_stats = dsa_user_get_mm_stats, }; -static const struct dcbnl_rtnl_ops __maybe_unused dsa_slave_dcbnl_ops = { - .ieee_setapp = dsa_slave_dcbnl_ieee_setapp, - .ieee_delapp = dsa_slave_dcbnl_ieee_delapp, +static const struct dcbnl_rtnl_ops __maybe_unused dsa_user_dcbnl_ops = { + .ieee_setapp = dsa_user_dcbnl_ieee_setapp, + .ieee_delapp = dsa_user_dcbnl_ieee_delapp, }; -static void dsa_slave_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *s) +static void dsa_user_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *s) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; if (ds->ops->get_stats64) @@ -2406,43 +2413,43 @@ static void dsa_slave_get_stats64(struct net_device *dev, dev_get_tstats64(dev, s); } -static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx, - struct net_device_path *path) +static int dsa_user_fill_forward_path(struct net_device_path_ctx *ctx, + struct net_device_path *path) { - struct dsa_port *dp = dsa_slave_to_port(ctx->dev); - struct net_device *master = dsa_port_to_master(dp); + struct dsa_port *dp = dsa_user_to_port(ctx->dev); + struct net_device *conduit = dsa_port_to_conduit(dp); struct dsa_port *cpu_dp = dp->cpu_dp; path->dev = ctx->dev; path->type = DEV_PATH_DSA; path->dsa.proto = cpu_dp->tag_ops->proto; path->dsa.port = dp->index; - ctx->dev = master; + ctx->dev = conduit; return 0; } -static const struct net_device_ops dsa_slave_netdev_ops = { - .ndo_open = dsa_slave_open, - .ndo_stop = dsa_slave_close, - .ndo_start_xmit = dsa_slave_xmit, - .ndo_change_rx_flags = dsa_slave_change_rx_flags, - .ndo_set_rx_mode = dsa_slave_set_rx_mode, - .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_fdb_dump = dsa_slave_fdb_dump, - .ndo_eth_ioctl = dsa_slave_ioctl, - .ndo_get_iflink = dsa_slave_get_iflink, +static const struct net_device_ops dsa_user_netdev_ops = { + .ndo_open = dsa_user_open, + .ndo_stop = dsa_user_close, + .ndo_start_xmit = dsa_user_xmit, + .ndo_change_rx_flags = dsa_user_change_rx_flags, + .ndo_set_rx_mode = dsa_user_set_rx_mode, + .ndo_set_mac_address = dsa_user_set_mac_address, + .ndo_fdb_dump = dsa_user_fdb_dump, + .ndo_eth_ioctl = dsa_user_ioctl, + .ndo_get_iflink = dsa_user_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_netpoll_setup = dsa_slave_netpoll_setup, - .ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup, - .ndo_poll_controller = dsa_slave_poll_controller, + .ndo_netpoll_setup = dsa_user_netpoll_setup, + .ndo_netpoll_cleanup = dsa_user_netpoll_cleanup, + .ndo_poll_controller = dsa_user_poll_controller, #endif - .ndo_setup_tc = dsa_slave_setup_tc, - .ndo_get_stats64 = dsa_slave_get_stats64, - .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid, - .ndo_change_mtu = dsa_slave_change_mtu, - .ndo_fill_forward_path = dsa_slave_fill_forward_path, + .ndo_setup_tc = dsa_user_setup_tc, + .ndo_get_stats64 = dsa_user_get_stats64, + .ndo_vlan_rx_add_vid = dsa_user_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = dsa_user_vlan_rx_kill_vid, + .ndo_change_mtu = dsa_user_change_mtu, + .ndo_fill_forward_path = dsa_user_fill_forward_path, }; static struct device_type dsa_type = { @@ -2458,8 +2465,8 @@ void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up) } EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change); -static void dsa_slave_phylink_fixed_state(struct phylink_config *config, - struct phylink_link_state *state) +static void dsa_user_phylink_fixed_state(struct phylink_config *config, + struct phylink_link_state *state) { struct dsa_port *dp = container_of(config, struct dsa_port, pl_config); struct dsa_switch *ds = dp->ds; @@ -2470,33 +2477,33 @@ static void dsa_slave_phylink_fixed_state(struct phylink_config *config, ds->ops->phylink_fixed_state(ds, dp->index, state); } -/* slave device setup *******************************************************/ -static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr, - u32 flags) +/* user device setup *******************************************************/ +static int dsa_user_phy_connect(struct net_device *user_dev, int addr, + u32 flags) { - struct dsa_port *dp = dsa_slave_to_port(slave_dev); + struct dsa_port *dp = dsa_user_to_port(user_dev); struct dsa_switch *ds = dp->ds; - slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr); - if (!slave_dev->phydev) { - netdev_err(slave_dev, "no phy at %d\n", addr); + user_dev->phydev = mdiobus_get_phy(ds->user_mii_bus, addr); + if (!user_dev->phydev) { + netdev_err(user_dev, "no phy at %d\n", addr); return -ENODEV; } - slave_dev->phydev->dev_flags |= flags; + user_dev->phydev->dev_flags |= flags; - return phylink_connect_phy(dp->pl, slave_dev->phydev); + return phylink_connect_phy(dp->pl, user_dev->phydev); } -static int dsa_slave_phy_setup(struct net_device *slave_dev) +static int dsa_user_phy_setup(struct net_device *user_dev) { - struct dsa_port *dp = dsa_slave_to_port(slave_dev); + struct dsa_port *dp = dsa_user_to_port(user_dev); struct device_node *port_dn = dp->dn; struct dsa_switch *ds = dp->ds; u32 phy_flags = 0; int ret; - dp->pl_config.dev = &slave_dev->dev; + dp->pl_config.dev = &user_dev->dev; dp->pl_config.type = PHYLINK_NETDEV; /* The get_fixed_state callback takes precedence over polling the @@ -2504,7 +2511,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) * this if the switch provides such a callback. */ if (ds->ops->phylink_fixed_state) { - dp->pl_config.get_fixed_state = dsa_slave_phylink_fixed_state; + dp->pl_config.get_fixed_state = dsa_user_phylink_fixed_state; dp->pl_config.poll_fixed_state = true; } @@ -2516,14 +2523,14 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) phy_flags = ds->ops->get_phy_flags(ds, dp->index); ret = phylink_of_phy_connect(dp->pl, port_dn, phy_flags); - if (ret == -ENODEV && ds->slave_mii_bus) { + if (ret == -ENODEV && ds->user_mii_bus) { /* We could not connect to a designated PHY or SFP, so try to * use the switch internal MDIO bus instead */ - ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags); + ret = dsa_user_phy_connect(user_dev, dp->index, phy_flags); } if (ret) { - netdev_err(slave_dev, "failed to connect to PHY: %pe\n", + netdev_err(user_dev, "failed to connect to PHY: %pe\n", ERR_PTR(ret)); dsa_port_phylink_destroy(dp); } @@ -2531,42 +2538,42 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) return ret; } -void dsa_slave_setup_tagger(struct net_device *slave) +void dsa_user_setup_tagger(struct net_device *user) { - struct dsa_port *dp = dsa_slave_to_port(slave); - struct net_device *master = dsa_port_to_master(dp); - struct dsa_slave_priv *p = netdev_priv(slave); + struct dsa_port *dp = dsa_user_to_port(user); + struct net_device *conduit = dsa_port_to_conduit(dp); + struct dsa_user_priv *p = netdev_priv(user); const struct dsa_port *cpu_dp = dp->cpu_dp; const struct dsa_switch *ds = dp->ds; - slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; - slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; - /* Try to save one extra realloc later in the TX path (in the master) - * by also inheriting the master's needed headroom and tailroom. + user->needed_headroom = cpu_dp->tag_ops->needed_headroom; + user->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; + /* Try to save one extra realloc later in the TX path (in the conduit) + * by also inheriting the conduit's needed headroom and tailroom. * The 8021q driver also does this. */ - slave->needed_headroom += master->needed_headroom; - slave->needed_tailroom += master->needed_tailroom; + user->needed_headroom += conduit->needed_headroom; + user->needed_tailroom += conduit->needed_tailroom; p->xmit = cpu_dp->tag_ops->xmit; - slave->features = master->vlan_features | NETIF_F_HW_TC; - slave->hw_features |= NETIF_F_HW_TC; - slave->features |= NETIF_F_LLTX; - if (slave->needed_tailroom) - slave->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); + user->features = conduit->vlan_features | NETIF_F_HW_TC; + user->hw_features |= NETIF_F_HW_TC; + user->features |= NETIF_F_LLTX; + if (user->needed_tailroom) + user->features &= ~(NETIF_F_SG | NETIF_F_FRAGLIST); if (ds->needs_standalone_vlan_filtering) - slave->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + user->features |= NETIF_F_HW_VLAN_CTAG_FILTER; } -int dsa_slave_suspend(struct net_device *slave_dev) +int dsa_user_suspend(struct net_device *user_dev) { - struct dsa_port *dp = dsa_slave_to_port(slave_dev); + struct dsa_port *dp = dsa_user_to_port(user_dev); - if (!netif_running(slave_dev)) + if (!netif_running(user_dev)) return 0; - netif_device_detach(slave_dev); + netif_device_detach(user_dev); rtnl_lock(); phylink_stop(dp->pl); @@ -2575,14 +2582,14 @@ int dsa_slave_suspend(struct net_device *slave_dev) return 0; } -int dsa_slave_resume(struct net_device *slave_dev) +int dsa_user_resume(struct net_device *user_dev) { - struct dsa_port *dp = dsa_slave_to_port(slave_dev); + struct dsa_port *dp = dsa_user_to_port(user_dev); - if (!netif_running(slave_dev)) + if (!netif_running(user_dev)) return 0; - netif_device_attach(slave_dev); + netif_device_attach(user_dev); rtnl_lock(); phylink_start(dp->pl); @@ -2591,12 +2598,12 @@ int dsa_slave_resume(struct net_device *slave_dev) return 0; } -int dsa_slave_create(struct dsa_port *port) +int dsa_user_create(struct dsa_port *port) { - struct net_device *master = dsa_port_to_master(port); + struct net_device *conduit = dsa_port_to_conduit(port); struct dsa_switch *ds = port->ds; - struct net_device *slave_dev; - struct dsa_slave_priv *p; + struct net_device *user_dev; + struct dsa_user_priv *p; const char *name; int assign_type; int ret; @@ -2612,55 +2619,55 @@ int dsa_slave_create(struct dsa_port *port) assign_type = NET_NAME_ENUM; } - slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name, - assign_type, ether_setup, - ds->num_tx_queues, 1); - if (slave_dev == NULL) + user_dev = alloc_netdev_mqs(sizeof(struct dsa_user_priv), name, + assign_type, ether_setup, + ds->num_tx_queues, 1); + if (user_dev == NULL) return -ENOMEM; - slave_dev->rtnl_link_ops = &dsa_link_ops; - slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; + user_dev->rtnl_link_ops = &dsa_link_ops; + user_dev->ethtool_ops = &dsa_user_ethtool_ops; #if IS_ENABLED(CONFIG_DCB) - slave_dev->dcbnl_ops = &dsa_slave_dcbnl_ops; + user_dev->dcbnl_ops = &dsa_user_dcbnl_ops; #endif if (!is_zero_ether_addr(port->mac)) - eth_hw_addr_set(slave_dev, port->mac); + eth_hw_addr_set(user_dev, port->mac); else - eth_hw_addr_inherit(slave_dev, master); - slave_dev->priv_flags |= IFF_NO_QUEUE; + eth_hw_addr_inherit(user_dev, conduit); + user_dev->priv_flags |= IFF_NO_QUEUE; if (dsa_switch_supports_uc_filtering(ds)) - slave_dev->priv_flags |= IFF_UNICAST_FLT; - slave_dev->netdev_ops = &dsa_slave_netdev_ops; + user_dev->priv_flags |= IFF_UNICAST_FLT; + user_dev->netdev_ops = &dsa_user_netdev_ops; if (ds->ops->port_max_mtu) - slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); - SET_NETDEV_DEVTYPE(slave_dev, &dsa_type); - - SET_NETDEV_DEV(slave_dev, port->ds->dev); - SET_NETDEV_DEVLINK_PORT(slave_dev, &port->devlink_port); - slave_dev->dev.of_node = port->dn; - slave_dev->vlan_features = master->vlan_features; - - p = netdev_priv(slave_dev); - slave_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!slave_dev->tstats) { - free_netdev(slave_dev); + user_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index); + SET_NETDEV_DEVTYPE(user_dev, &dsa_type); + + SET_NETDEV_DEV(user_dev, port->ds->dev); + SET_NETDEV_DEVLINK_PORT(user_dev, &port->devlink_port); + user_dev->dev.of_node = port->dn; + user_dev->vlan_features = conduit->vlan_features; + + p = netdev_priv(user_dev); + user_dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!user_dev->tstats) { + free_netdev(user_dev); return -ENOMEM; } - ret = gro_cells_init(&p->gcells, slave_dev); + ret = gro_cells_init(&p->gcells, user_dev); if (ret) goto out_free; p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); - port->slave = slave_dev; - dsa_slave_setup_tagger(slave_dev); + port->user = user_dev; + dsa_user_setup_tagger(user_dev); - netif_carrier_off(slave_dev); + netif_carrier_off(user_dev); - ret = dsa_slave_phy_setup(slave_dev); + ret = dsa_user_phy_setup(user_dev); if (ret) { - netdev_err(slave_dev, + netdev_err(user_dev, "error %d setting up PHY for tree %d, switch %d, port %d\n", ret, ds->dst->index, ds->index, port->index); goto out_gcells; @@ -2668,23 +2675,23 @@ int dsa_slave_create(struct dsa_port *port) rtnl_lock(); - ret = dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN); + ret = dsa_user_change_mtu(user_dev, ETH_DATA_LEN); if (ret && ret != -EOPNOTSUPP) dev_warn(ds->dev, "nonfatal error %d setting MTU to %d on port %d\n", ret, ETH_DATA_LEN, port->index); - ret = register_netdevice(slave_dev); + ret = register_netdevice(user_dev); if (ret) { - netdev_err(master, "error %d registering interface %s\n", - ret, slave_dev->name); + netdev_err(conduit, "error %d registering interface %s\n", + ret, user_dev->name); rtnl_unlock(); goto out_phy; } if (IS_ENABLED(CONFIG_DCB)) { - ret = dsa_slave_dcbnl_init(slave_dev); + ret = dsa_user_dcbnl_init(user_dev); if (ret) { - netdev_err(slave_dev, + netdev_err(user_dev, "failed to initialize DCB: %pe\n", ERR_PTR(ret)); rtnl_unlock(); @@ -2692,7 +2699,7 @@ int dsa_slave_create(struct dsa_port *port) } } - ret = netdev_upper_dev_link(master, slave_dev, NULL); + ret = netdev_upper_dev_link(conduit, user_dev, NULL); rtnl_unlock(); @@ -2702,7 +2709,7 @@ int dsa_slave_create(struct dsa_port *port) return 0; out_unregister: - unregister_netdev(slave_dev); + unregister_netdev(user_dev); out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); @@ -2711,124 +2718,125 @@ out_phy: out_gcells: gro_cells_destroy(&p->gcells); out_free: - free_percpu(slave_dev->tstats); - free_netdev(slave_dev); - port->slave = NULL; + free_percpu(user_dev->tstats); + free_netdev(user_dev); + port->user = NULL; return ret; } -void dsa_slave_destroy(struct net_device *slave_dev) +void dsa_user_destroy(struct net_device *user_dev) { - struct net_device *master = dsa_slave_to_master(slave_dev); - struct dsa_port *dp = dsa_slave_to_port(slave_dev); - struct dsa_slave_priv *p = netdev_priv(slave_dev); + struct net_device *conduit = dsa_user_to_conduit(user_dev); + struct dsa_port *dp = dsa_user_to_port(user_dev); + struct dsa_user_priv *p = netdev_priv(user_dev); - netif_carrier_off(slave_dev); + netif_carrier_off(user_dev); rtnl_lock(); - netdev_upper_dev_unlink(master, slave_dev); - unregister_netdevice(slave_dev); + netdev_upper_dev_unlink(conduit, user_dev); + unregister_netdevice(user_dev); phylink_disconnect_phy(dp->pl); rtnl_unlock(); dsa_port_phylink_destroy(dp); gro_cells_destroy(&p->gcells); - free_percpu(slave_dev->tstats); - free_netdev(slave_dev); + free_percpu(user_dev->tstats); + free_netdev(user_dev); } -int dsa_slave_change_master(struct net_device *dev, struct net_device *master, +int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit, struct netlink_ext_ack *extack) { - struct net_device *old_master = dsa_slave_to_master(dev); - struct dsa_port *dp = dsa_slave_to_port(dev); + struct net_device *old_conduit = dsa_user_to_conduit(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch *ds = dp->ds; struct net_device *upper; struct list_head *iter; int err; - if (master == old_master) + if (conduit == old_conduit) return 0; - if (!ds->ops->port_change_master) { + if (!ds->ops->port_change_conduit) { NL_SET_ERR_MSG_MOD(extack, - "Driver does not support changing DSA master"); + "Driver does not support changing DSA conduit"); return -EOPNOTSUPP; } - if (!netdev_uses_dsa(master)) { + if (!netdev_uses_dsa(conduit)) { NL_SET_ERR_MSG_MOD(extack, - "Interface not eligible as DSA master"); + "Interface not eligible as DSA conduit"); return -EOPNOTSUPP; } - netdev_for_each_upper_dev_rcu(master, upper, iter) { - if (dsa_slave_dev_check(upper)) + netdev_for_each_upper_dev_rcu(conduit, upper, iter) { + if (dsa_user_dev_check(upper)) continue; if (netif_is_bridge_master(upper)) continue; - NL_SET_ERR_MSG_MOD(extack, "Cannot join master with unknown uppers"); + NL_SET_ERR_MSG_MOD(extack, "Cannot join conduit with unknown uppers"); return -EOPNOTSUPP; } - /* Since we allow live-changing the DSA master, plus we auto-open the - * DSA master when the user port opens => we need to ensure that the - * new DSA master is open too. + /* Since we allow live-changing the DSA conduit, plus we auto-open the + * DSA conduit when the user port opens => we need to ensure that the + * new DSA conduit is open too. */ if (dev->flags & IFF_UP) { - err = dev_open(master, extack); + err = dev_open(conduit, extack); if (err) return err; } - netdev_upper_dev_unlink(old_master, dev); + netdev_upper_dev_unlink(old_conduit, dev); - err = netdev_upper_dev_link(master, dev, extack); + err = netdev_upper_dev_link(conduit, dev, extack); if (err) - goto out_revert_old_master_unlink; + goto out_revert_old_conduit_unlink; - err = dsa_port_change_master(dp, master, extack); + err = dsa_port_change_conduit(dp, conduit, extack); if (err) - goto out_revert_master_link; + goto out_revert_conduit_link; /* Update the MTU of the new CPU port through cross-chip notifiers */ - err = dsa_slave_change_mtu(dev, dev->mtu); + err = dsa_user_change_mtu(dev, dev->mtu); if (err && err != -EOPNOTSUPP) { netdev_warn(dev, - "nonfatal error updating MTU with new master: %pe\n", + "nonfatal error updating MTU with new conduit: %pe\n", ERR_PTR(err)); } /* If the port doesn't have its own MAC address and relies on the DSA - * master's one, inherit it again from the new DSA master. + * conduit's one, inherit it again from the new DSA conduit. */ if (is_zero_ether_addr(dp->mac)) - eth_hw_addr_inherit(dev, master); + eth_hw_addr_inherit(dev, conduit); return 0; -out_revert_master_link: - netdev_upper_dev_unlink(master, dev); -out_revert_old_master_unlink: - netdev_upper_dev_link(old_master, dev, NULL); +out_revert_conduit_link: + netdev_upper_dev_unlink(conduit, dev); +out_revert_old_conduit_unlink: + netdev_upper_dev_link(old_conduit, dev, NULL); return err; } -bool dsa_slave_dev_check(const struct net_device *dev) +bool dsa_user_dev_check(const struct net_device *dev) { - return dev->netdev_ops == &dsa_slave_netdev_ops; + return dev->netdev_ops == &dsa_user_netdev_ops; } -EXPORT_SYMBOL_GPL(dsa_slave_dev_check); +EXPORT_SYMBOL_GPL(dsa_user_dev_check); -static int dsa_slave_changeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +static int dsa_user_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { - struct dsa_port *dp = dsa_slave_to_port(dev); struct netlink_ext_ack *extack; int err = NOTIFY_DONE; + struct dsa_port *dp; - if (!dsa_slave_dev_check(dev)) + if (!dsa_user_dev_check(dev)) return err; + dp = dsa_user_to_port(dev); extack = netdev_notifier_info_to_extack(&info->info); if (netif_is_bridge_master(info->upper_dev)) { @@ -2862,7 +2870,7 @@ static int dsa_slave_changeupper(struct net_device *dev, } } else if (is_hsr_master(info->upper_dev)) { if (info->linking) { - err = dsa_port_hsr_join(dp, info->upper_dev); + err = dsa_port_hsr_join(dp, info->upper_dev, extack); if (err == -EOPNOTSUPP) { NL_SET_ERR_MSG_WEAK_MOD(extack, "Offloading not supported"); @@ -2878,28 +2886,30 @@ static int dsa_slave_changeupper(struct net_device *dev, return err; } -static int dsa_slave_prechangeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +static int dsa_user_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp; - if (!dsa_slave_dev_check(dev)) + if (!dsa_user_dev_check(dev)) return NOTIFY_DONE; + dp = dsa_user_to_port(dev); + if (netif_is_bridge_master(info->upper_dev) && !info->linking) dsa_port_pre_bridge_leave(dp, info->upper_dev); else if (netif_is_lag_master(info->upper_dev) && !info->linking) dsa_port_pre_lag_leave(dp, info->upper_dev); - /* dsa_port_pre_hsr_leave is not yet necessary since hsr cannot be - * meaningfully enslaved to a bridge yet + /* dsa_port_pre_hsr_leave is not yet necessary since hsr devices cannot + * meaningfully placed under a bridge yet */ return NOTIFY_DONE; } static int -dsa_slave_lag_changeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +dsa_user_lag_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { struct net_device *lower; struct list_head *iter; @@ -2910,15 +2920,15 @@ dsa_slave_lag_changeupper(struct net_device *dev, return err; netdev_for_each_lower_dev(dev, lower, iter) { - if (!dsa_slave_dev_check(lower)) + if (!dsa_user_dev_check(lower)) continue; - dp = dsa_slave_to_port(lower); + dp = dsa_user_to_port(lower); if (!dp->lag) /* Software LAG */ continue; - err = dsa_slave_changeupper(lower, info); + err = dsa_user_changeupper(lower, info); if (notifier_to_errno(err)) break; } @@ -2926,12 +2936,12 @@ dsa_slave_lag_changeupper(struct net_device *dev, return err; } -/* Same as dsa_slave_lag_changeupper() except that it calls - * dsa_slave_prechangeupper() +/* Same as dsa_user_lag_changeupper() except that it calls + * dsa_user_prechangeupper() */ static int -dsa_slave_lag_prechangeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +dsa_user_lag_prechangeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { struct net_device *lower; struct list_head *iter; @@ -2942,15 +2952,15 @@ dsa_slave_lag_prechangeupper(struct net_device *dev, return err; netdev_for_each_lower_dev(dev, lower, iter) { - if (!dsa_slave_dev_check(lower)) + if (!dsa_user_dev_check(lower)) continue; - dp = dsa_slave_to_port(lower); + dp = dsa_user_to_port(lower); if (!dp->lag) /* Software LAG */ continue; - err = dsa_slave_prechangeupper(lower, info); + err = dsa_user_prechangeupper(lower, info); if (notifier_to_errno(err)) break; } @@ -2963,7 +2973,7 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *ext_ack; - struct net_device *slave, *br; + struct net_device *user, *br; struct dsa_port *dp; ext_ack = netdev_notifier_info_to_extack(&info->info); @@ -2971,11 +2981,11 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev, if (!is_vlan_dev(dev)) return NOTIFY_DONE; - slave = vlan_dev_real_dev(dev); - if (!dsa_slave_dev_check(slave)) + user = vlan_dev_real_dev(dev); + if (!dsa_user_dev_check(user)) return NOTIFY_DONE; - dp = dsa_slave_to_port(slave); + dp = dsa_user_to_port(user); br = dsa_port_bridge_dev_get(dp); if (!br) return NOTIFY_DONE; @@ -2984,7 +2994,7 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev, if (br_vlan_enabled(br) && netif_is_bridge_master(info->upper_dev) && info->linking) { NL_SET_ERR_MSG_MOD(ext_ack, - "Cannot enslave VLAN device into VLAN aware bridge"); + "Cannot make VLAN device join VLAN-aware bridge"); return notifier_from_errno(-EINVAL); } @@ -2992,10 +3002,10 @@ dsa_prevent_bridging_8021q_upper(struct net_device *dev, } static int -dsa_slave_check_8021q_upper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +dsa_user_check_8021q_upper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); struct net_device *br = dsa_port_bridge_dev_get(dp); struct bridge_vlan_info br_info; struct netlink_ext_ack *extack; @@ -3023,17 +3033,17 @@ dsa_slave_check_8021q_upper(struct net_device *dev, } static int -dsa_slave_prechangeupper_sanity_check(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +dsa_user_prechangeupper_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { struct dsa_switch *ds; struct dsa_port *dp; int err; - if (!dsa_slave_dev_check(dev)) + if (!dsa_user_dev_check(dev)) return dsa_prevent_bridging_8021q_upper(dev, info); - dp = dsa_slave_to_port(dev); + dp = dsa_user_to_port(dev); ds = dp->ds; if (ds->ops->port_prechangeupper) { @@ -3043,17 +3053,17 @@ dsa_slave_prechangeupper_sanity_check(struct net_device *dev, } if (is_vlan_dev(info->upper_dev)) - return dsa_slave_check_8021q_upper(dev, info); + return dsa_user_check_8021q_upper(dev, info); return NOTIFY_DONE; } -/* To be eligible as a DSA master, a LAG must have all lower interfaces be - * eligible DSA masters. Additionally, all LAG slaves must be DSA masters of +/* To be eligible as a DSA conduit, a LAG must have all lower interfaces be + * eligible DSA conduits. Additionally, all LAG slaves must be DSA conduits of * switches in the same switch tree. */ -static int dsa_lag_master_validate(struct net_device *lag_dev, - struct netlink_ext_ack *extack) +static int dsa_lag_conduit_validate(struct net_device *lag_dev, + struct netlink_ext_ack *extack) { struct net_device *lower1, *lower2; struct list_head *iter1, *iter2; @@ -3063,7 +3073,7 @@ static int dsa_lag_master_validate(struct net_device *lag_dev, if (!netdev_uses_dsa(lower1) || !netdev_uses_dsa(lower2)) { NL_SET_ERR_MSG_MOD(extack, - "All LAG ports must be eligible as DSA masters"); + "All LAG ports must be eligible as DSA conduits"); return notifier_from_errno(-EINVAL); } @@ -3073,7 +3083,7 @@ static int dsa_lag_master_validate(struct net_device *lag_dev, if (!dsa_port_tree_same(lower1->dsa_ptr, lower2->dsa_ptr)) { NL_SET_ERR_MSG_MOD(extack, - "LAG contains DSA masters of disjoint switch trees"); + "LAG contains DSA conduits of disjoint switch trees"); return notifier_from_errno(-EINVAL); } } @@ -3083,41 +3093,41 @@ static int dsa_lag_master_validate(struct net_device *lag_dev, } static int -dsa_master_prechangeupper_sanity_check(struct net_device *master, - struct netdev_notifier_changeupper_info *info) +dsa_conduit_prechangeupper_sanity_check(struct net_device *conduit, + struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); - if (!netdev_uses_dsa(master)) + if (!netdev_uses_dsa(conduit)) return NOTIFY_DONE; if (!info->linking) return NOTIFY_DONE; /* Allow DSA switch uppers */ - if (dsa_slave_dev_check(info->upper_dev)) + if (dsa_user_dev_check(info->upper_dev)) return NOTIFY_DONE; - /* Allow bridge uppers of DSA masters, subject to further + /* Allow bridge uppers of DSA conduits, subject to further * restrictions in dsa_bridge_prechangelower_sanity_check() */ if (netif_is_bridge_master(info->upper_dev)) return NOTIFY_DONE; /* Allow LAG uppers, subject to further restrictions in - * dsa_lag_master_prechangelower_sanity_check() + * dsa_lag_conduit_prechangelower_sanity_check() */ if (netif_is_lag_master(info->upper_dev)) - return dsa_lag_master_validate(info->upper_dev, extack); + return dsa_lag_conduit_validate(info->upper_dev, extack); NL_SET_ERR_MSG_MOD(extack, - "DSA master cannot join unknown upper interfaces"); + "DSA conduit cannot join unknown upper interfaces"); return notifier_from_errno(-EBUSY); } static int -dsa_lag_master_prechangelower_sanity_check(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +dsa_lag_conduit_prechangelower_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); struct net_device *lag_dev = info->upper_dev; @@ -3132,14 +3142,14 @@ dsa_lag_master_prechangelower_sanity_check(struct net_device *dev, if (!netdev_uses_dsa(dev)) { NL_SET_ERR_MSG(extack, - "Only DSA masters can join a LAG DSA master"); + "Only DSA conduits can join a LAG DSA conduit"); return notifier_from_errno(-EINVAL); } netdev_for_each_lower_dev(lag_dev, lower, iter) { if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) { NL_SET_ERR_MSG(extack, - "Interface is DSA master for a different switch tree than this LAG"); + "Interface is DSA conduit for a different switch tree than this LAG"); return notifier_from_errno(-EINVAL); } @@ -3149,13 +3159,13 @@ dsa_lag_master_prechangelower_sanity_check(struct net_device *dev, return NOTIFY_DONE; } -/* Don't allow bridging of DSA masters, since the bridge layer rx_handler +/* Don't allow bridging of DSA conduits, since the bridge layer rx_handler * prevents the DSA fake ethertype handler to be invoked, so we don't get the * chance to strip off and parse the DSA switch tag protocol header (the bridge * layer just returns RX_HANDLER_CONSUMED, stopping RX processing for these * frames). * The only case where that would not be an issue is when bridging can already - * be offloaded, such as when the DSA master is itself a DSA or plain switchdev + * be offloaded, such as when the DSA conduit is itself a DSA or plain switchdev * port, and is bridged only with other ports from the same hardware device. */ static int @@ -3181,7 +3191,7 @@ dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower, if (!netdev_port_same_parent_id(lower, new_lower)) { NL_SET_ERR_MSG(extack, - "Cannot do software bridging with a DSA master"); + "Cannot do software bridging with a DSA conduit"); return notifier_from_errno(-EINVAL); } } @@ -3189,45 +3199,45 @@ dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower, return NOTIFY_DONE; } -static void dsa_tree_migrate_ports_from_lag_master(struct dsa_switch_tree *dst, - struct net_device *lag_dev) +static void dsa_tree_migrate_ports_from_lag_conduit(struct dsa_switch_tree *dst, + struct net_device *lag_dev) { - struct net_device *new_master = dsa_tree_find_first_master(dst); + struct net_device *new_conduit = dsa_tree_find_first_conduit(dst); struct dsa_port *dp; int err; dsa_tree_for_each_user_port(dp, dst) { - if (dsa_port_to_master(dp) != lag_dev) + if (dsa_port_to_conduit(dp) != lag_dev) continue; - err = dsa_slave_change_master(dp->slave, new_master, NULL); + err = dsa_user_change_conduit(dp->user, new_conduit, NULL); if (err) { - netdev_err(dp->slave, - "failed to restore master to %s: %pe\n", - new_master->name, ERR_PTR(err)); + netdev_err(dp->user, + "failed to restore conduit to %s: %pe\n", + new_conduit->name, ERR_PTR(err)); } } } -static int dsa_master_lag_join(struct net_device *master, - struct net_device *lag_dev, - struct netdev_lag_upper_info *uinfo, - struct netlink_ext_ack *extack) +static int dsa_conduit_lag_join(struct net_device *conduit, + struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) { - struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_port *cpu_dp = conduit->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; struct dsa_port *dp; int err; - err = dsa_master_lag_setup(lag_dev, cpu_dp, uinfo, extack); + err = dsa_conduit_lag_setup(lag_dev, cpu_dp, uinfo, extack); if (err) return err; dsa_tree_for_each_user_port(dp, dst) { - if (dsa_port_to_master(dp) != master) + if (dsa_port_to_conduit(dp) != conduit) continue; - err = dsa_slave_change_master(dp->slave, lag_dev, extack); + err = dsa_user_change_conduit(dp->user, lag_dev, extack); if (err) goto restore; } @@ -3236,24 +3246,24 @@ static int dsa_master_lag_join(struct net_device *master, restore: dsa_tree_for_each_user_port_continue_reverse(dp, dst) { - if (dsa_port_to_master(dp) != lag_dev) + if (dsa_port_to_conduit(dp) != lag_dev) continue; - err = dsa_slave_change_master(dp->slave, master, NULL); + err = dsa_user_change_conduit(dp->user, conduit, NULL); if (err) { - netdev_err(dp->slave, - "failed to restore master to %s: %pe\n", - master->name, ERR_PTR(err)); + netdev_err(dp->user, + "failed to restore conduit to %s: %pe\n", + conduit->name, ERR_PTR(err)); } } - dsa_master_lag_teardown(lag_dev, master->dsa_ptr); + dsa_conduit_lag_teardown(lag_dev, conduit->dsa_ptr); return err; } -static void dsa_master_lag_leave(struct net_device *master, - struct net_device *lag_dev) +static void dsa_conduit_lag_leave(struct net_device *conduit, + struct net_device *lag_dev) { struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->dst; @@ -3270,10 +3280,10 @@ static void dsa_master_lag_leave(struct net_device *master, if (new_cpu_dp) { /* Update the CPU port of the user ports still under the LAG - * so that dsa_port_to_master() continues to work properly + * so that dsa_port_to_conduit() continues to work properly */ dsa_tree_for_each_user_port(dp, dst) - if (dsa_port_to_master(dp) == lag_dev) + if (dsa_port_to_conduit(dp) == lag_dev) dp->cpu_dp = new_cpu_dp; /* Update the index of the virtual CPU port to match the lowest @@ -3282,20 +3292,20 @@ static void dsa_master_lag_leave(struct net_device *master, lag_dev->dsa_ptr = new_cpu_dp; wmb(); } else { - /* If the LAG DSA master has no ports left, migrate back all + /* If the LAG DSA conduit has no ports left, migrate back all * user ports to the first physical CPU port */ - dsa_tree_migrate_ports_from_lag_master(dst, lag_dev); + dsa_tree_migrate_ports_from_lag_conduit(dst, lag_dev); } - /* This DSA master has left its LAG in any case, so let + /* This DSA conduit has left its LAG in any case, so let * the CPU port leave the hardware LAG as well */ - dsa_master_lag_teardown(lag_dev, master->dsa_ptr); + dsa_conduit_lag_teardown(lag_dev, conduit->dsa_ptr); } -static int dsa_master_changeupper(struct net_device *dev, - struct netdev_notifier_changeupper_info *info) +static int dsa_conduit_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) { struct netlink_ext_ack *extack; int err = NOTIFY_DONE; @@ -3307,11 +3317,11 @@ static int dsa_master_changeupper(struct net_device *dev, if (netif_is_lag_master(info->upper_dev)) { if (info->linking) { - err = dsa_master_lag_join(dev, info->upper_dev, - info->upper_info, extack); + err = dsa_conduit_lag_join(dev, info->upper_dev, + info->upper_info, extack); err = notifier_from_errno(err); } else { - dsa_master_lag_leave(dev, info->upper_dev); + dsa_conduit_lag_leave(dev, info->upper_dev); err = NOTIFY_OK; } } @@ -3319,8 +3329,8 @@ static int dsa_master_changeupper(struct net_device *dev, return err; } -static int dsa_slave_netdevice_event(struct notifier_block *nb, - unsigned long event, void *ptr) +static int dsa_user_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -3329,15 +3339,15 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, struct netdev_notifier_changeupper_info *info = ptr; int err; - err = dsa_slave_prechangeupper_sanity_check(dev, info); + err = dsa_user_prechangeupper_sanity_check(dev, info); if (notifier_to_errno(err)) return err; - err = dsa_master_prechangeupper_sanity_check(dev, info); + err = dsa_conduit_prechangeupper_sanity_check(dev, info); if (notifier_to_errno(err)) return err; - err = dsa_lag_master_prechangelower_sanity_check(dev, info); + err = dsa_lag_conduit_prechangelower_sanity_check(dev, info); if (notifier_to_errno(err)) return err; @@ -3345,11 +3355,11 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, if (notifier_to_errno(err)) return err; - err = dsa_slave_prechangeupper(dev, ptr); + err = dsa_user_prechangeupper(dev, ptr); if (notifier_to_errno(err)) return err; - err = dsa_slave_lag_prechangeupper(dev, ptr); + err = dsa_user_lag_prechangeupper(dev, ptr); if (notifier_to_errno(err)) return err; @@ -3358,15 +3368,15 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, case NETDEV_CHANGEUPPER: { int err; - err = dsa_slave_changeupper(dev, ptr); + err = dsa_user_changeupper(dev, ptr); if (notifier_to_errno(err)) return err; - err = dsa_slave_lag_changeupper(dev, ptr); + err = dsa_user_lag_changeupper(dev, ptr); if (notifier_to_errno(err)) return err; - err = dsa_master_changeupper(dev, ptr); + err = dsa_conduit_changeupper(dev, ptr); if (notifier_to_errno(err)) return err; @@ -3377,13 +3387,13 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, struct dsa_port *dp; int err = 0; - if (dsa_slave_dev_check(dev)) { - dp = dsa_slave_to_port(dev); + if (dsa_user_dev_check(dev)) { + dp = dsa_user_to_port(dev); err = dsa_port_lag_change(dp, info->lower_state_info); } - /* Mirror LAG port events on DSA masters that are in + /* Mirror LAG port events on DSA conduits that are in * a LAG towards their respective switch CPU ports */ if (netdev_uses_dsa(dev)) { @@ -3396,28 +3406,28 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, } case NETDEV_CHANGE: case NETDEV_UP: { - /* Track state of master port. - * DSA driver may require the master port (and indirectly + /* Track state of conduit port. + * DSA driver may require the conduit port (and indirectly * the tagger) to be available for some special operation. */ if (netdev_uses_dsa(dev)) { struct dsa_port *cpu_dp = dev->dsa_ptr; struct dsa_switch_tree *dst = cpu_dp->ds->dst; - /* Track when the master port is UP */ - dsa_tree_master_oper_state_change(dst, dev, - netif_oper_up(dev)); + /* Track when the conduit port is UP */ + dsa_tree_conduit_oper_state_change(dst, dev, + netif_oper_up(dev)); - /* Track when the master port is ready and can accept + /* Track when the conduit port is ready and can accept * packet. * NETDEV_UP event is not enough to flag a port as ready. * We also have to wait for linkwatch_do_dev to dev_activate * and emit a NETDEV_CHANGE event. - * We check if a master port is ready by checking if the dev + * We check if a conduit port is ready by checking if the dev * have a qdisc assigned and is not noop. */ - dsa_tree_master_admin_state_change(dst, dev, - !qdisc_tx_is_noop(dev)); + dsa_tree_conduit_admin_state_change(dst, dev, + !qdisc_tx_is_noop(dev)); return NOTIFY_OK; } @@ -3435,7 +3445,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, cpu_dp = dev->dsa_ptr; dst = cpu_dp->ds->dst; - dsa_tree_master_admin_state_change(dst, dev, false); + dsa_tree_conduit_admin_state_change(dst, dev, false); list_for_each_entry(dp, &dst->ports, list) { if (!dsa_port_is_user(dp)) @@ -3444,7 +3454,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, if (dp->cpu_dp != cpu_dp) continue; - list_add(&dp->slave->close_list, &close_list); + list_add(&dp->user->close_list, &close_list); } dev_close_many(&close_list, true); @@ -3470,7 +3480,7 @@ dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) switchdev_work->orig_dev, &info.info, NULL); } -static void dsa_slave_switchdev_event_work(struct work_struct *work) +static void dsa_user_switchdev_event_work(struct work_struct *work) { struct dsa_switchdev_event_work *switchdev_work = container_of(work, struct dsa_switchdev_event_work, work); @@ -3481,7 +3491,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) struct dsa_port *dp; int err; - dp = dsa_slave_to_port(dev); + dp = dsa_user_to_port(dev); ds = dp->ds; switch (switchdev_work->event) { @@ -3523,7 +3533,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work) static bool dsa_foreign_dev_check(const struct net_device *dev, const struct net_device *foreign_dev) { - const struct dsa_port *dp = dsa_slave_to_port(dev); + const struct dsa_port *dp = dsa_user_to_port(dev); struct dsa_switch_tree *dst = dp->ds->dst; if (netif_is_bridge_master(foreign_dev)) @@ -3536,13 +3546,13 @@ static bool dsa_foreign_dev_check(const struct net_device *dev, return true; } -static int dsa_slave_fdb_event(struct net_device *dev, - struct net_device *orig_dev, - unsigned long event, const void *ctx, - const struct switchdev_notifier_fdb_info *fdb_info) +static int dsa_user_fdb_event(struct net_device *dev, + struct net_device *orig_dev, + unsigned long event, const void *ctx, + const struct switchdev_notifier_fdb_info *fdb_info) { struct dsa_switchdev_event_work *switchdev_work; - struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_port *dp = dsa_user_to_port(dev); bool host_addr = fdb_info->is_local; struct dsa_switch *ds = dp->ds; @@ -3591,7 +3601,7 @@ static int dsa_slave_fdb_event(struct net_device *dev, orig_dev->name, fdb_info->addr, fdb_info->vid, host_addr ? " as host address" : ""); - INIT_WORK(&switchdev_work->work, dsa_slave_switchdev_event_work); + INIT_WORK(&switchdev_work->work, dsa_user_switchdev_event_work); switchdev_work->event = event; switchdev_work->dev = dev; switchdev_work->orig_dev = orig_dev; @@ -3606,8 +3616,8 @@ static int dsa_slave_fdb_event(struct net_device *dev, } /* Called under rcu_read_lock() */ -static int dsa_slave_switchdev_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int dsa_user_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); int err; @@ -3615,15 +3625,15 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, switch (event) { case SWITCHDEV_PORT_ATTR_SET: err = switchdev_handle_port_attr_set(dev, ptr, - dsa_slave_dev_check, - dsa_slave_port_attr_set); + dsa_user_dev_check, + dsa_user_port_attr_set); return notifier_from_errno(err); case SWITCHDEV_FDB_ADD_TO_DEVICE: case SWITCHDEV_FDB_DEL_TO_DEVICE: err = switchdev_handle_fdb_event_to_device(dev, event, ptr, - dsa_slave_dev_check, + dsa_user_dev_check, dsa_foreign_dev_check, - dsa_slave_fdb_event); + dsa_user_fdb_event); return notifier_from_errno(err); default: return NOTIFY_DONE; @@ -3632,8 +3642,8 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, return NOTIFY_OK; } -static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int dsa_user_switchdev_blocking_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); int err; @@ -3641,52 +3651,52 @@ static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused, switch (event) { case SWITCHDEV_PORT_OBJ_ADD: err = switchdev_handle_port_obj_add_foreign(dev, ptr, - dsa_slave_dev_check, + dsa_user_dev_check, dsa_foreign_dev_check, - dsa_slave_port_obj_add); + dsa_user_port_obj_add); return notifier_from_errno(err); case SWITCHDEV_PORT_OBJ_DEL: err = switchdev_handle_port_obj_del_foreign(dev, ptr, - dsa_slave_dev_check, + dsa_user_dev_check, dsa_foreign_dev_check, - dsa_slave_port_obj_del); + dsa_user_port_obj_del); return notifier_from_errno(err); case SWITCHDEV_PORT_ATTR_SET: err = switchdev_handle_port_attr_set(dev, ptr, - dsa_slave_dev_check, - dsa_slave_port_attr_set); + dsa_user_dev_check, + dsa_user_port_attr_set); return notifier_from_errno(err); } return NOTIFY_DONE; } -static struct notifier_block dsa_slave_nb __read_mostly = { - .notifier_call = dsa_slave_netdevice_event, +static struct notifier_block dsa_user_nb __read_mostly = { + .notifier_call = dsa_user_netdevice_event, }; -struct notifier_block dsa_slave_switchdev_notifier = { - .notifier_call = dsa_slave_switchdev_event, +struct notifier_block dsa_user_switchdev_notifier = { + .notifier_call = dsa_user_switchdev_event, }; -struct notifier_block dsa_slave_switchdev_blocking_notifier = { - .notifier_call = dsa_slave_switchdev_blocking_event, +struct notifier_block dsa_user_switchdev_blocking_notifier = { + .notifier_call = dsa_user_switchdev_blocking_event, }; -int dsa_slave_register_notifier(void) +int dsa_user_register_notifier(void) { struct notifier_block *nb; int err; - err = register_netdevice_notifier(&dsa_slave_nb); + err = register_netdevice_notifier(&dsa_user_nb); if (err) return err; - err = register_switchdev_notifier(&dsa_slave_switchdev_notifier); + err = register_switchdev_notifier(&dsa_user_switchdev_notifier); if (err) goto err_switchdev_nb; - nb = &dsa_slave_switchdev_blocking_notifier; + nb = &dsa_user_switchdev_blocking_notifier; err = register_switchdev_blocking_notifier(nb); if (err) goto err_switchdev_blocking_nb; @@ -3694,27 +3704,27 @@ int dsa_slave_register_notifier(void) return 0; err_switchdev_blocking_nb: - unregister_switchdev_notifier(&dsa_slave_switchdev_notifier); + unregister_switchdev_notifier(&dsa_user_switchdev_notifier); err_switchdev_nb: - unregister_netdevice_notifier(&dsa_slave_nb); + unregister_netdevice_notifier(&dsa_user_nb); return err; } -void dsa_slave_unregister_notifier(void) +void dsa_user_unregister_notifier(void) { struct notifier_block *nb; int err; - nb = &dsa_slave_switchdev_blocking_notifier; + nb = &dsa_user_switchdev_blocking_notifier; err = unregister_switchdev_blocking_notifier(nb); if (err) pr_err("DSA: failed to unregister switchdev blocking notifier (%d)\n", err); - err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier); + err = unregister_switchdev_notifier(&dsa_user_switchdev_notifier); if (err) pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err); - err = unregister_netdevice_notifier(&dsa_slave_nb); + err = unregister_netdevice_notifier(&dsa_user_nb); if (err) - pr_err("DSA: failed to unregister slave notifier (%d)\n", err); + pr_err("DSA: failed to unregister user notifier (%d)\n", err); } diff --git a/net/dsa/user.h b/net/dsa/user.h new file mode 100644 index 0000000000..996069130b --- /dev/null +++ b/net/dsa/user.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef __DSA_USER_H +#define __DSA_USER_H + +#include <linux/if_bridge.h> +#include <linux/if_vlan.h> +#include <linux/list.h> +#include <linux/netpoll.h> +#include <linux/types.h> +#include <net/dsa.h> +#include <net/gro_cells.h> + +struct net_device; +struct netlink_ext_ack; + +extern struct notifier_block dsa_user_switchdev_notifier; +extern struct notifier_block dsa_user_switchdev_blocking_notifier; + +struct dsa_user_priv { + /* Copy of CPU port xmit for faster access in user transmit hot path */ + struct sk_buff * (*xmit)(struct sk_buff *skb, + struct net_device *dev); + + struct gro_cells gcells; + + /* DSA port data, such as switch, port index, etc. */ + struct dsa_port *dp; + +#ifdef CONFIG_NET_POLL_CONTROLLER + struct netpoll *netpoll; +#endif + + /* TC context */ + struct list_head mall_tc_list; +}; + +void dsa_user_mii_bus_init(struct dsa_switch *ds); +int dsa_user_create(struct dsa_port *dp); +void dsa_user_destroy(struct net_device *user_dev); +int dsa_user_suspend(struct net_device *user_dev); +int dsa_user_resume(struct net_device *user_dev); +int dsa_user_register_notifier(void); +void dsa_user_unregister_notifier(void); +void dsa_user_sync_ha(struct net_device *dev); +void dsa_user_unsync_ha(struct net_device *dev); +void dsa_user_setup_tagger(struct net_device *user); +int dsa_user_change_mtu(struct net_device *dev, int new_mtu); +int dsa_user_change_conduit(struct net_device *dev, struct net_device *conduit, + struct netlink_ext_ack *extack); +int dsa_user_manage_vlan_filtering(struct net_device *dev, + bool vlan_filtering); + +static inline struct dsa_port *dsa_user_to_port(const struct net_device *dev) +{ + struct dsa_user_priv *p = netdev_priv(dev); + + return p->dp; +} + +static inline struct net_device * +dsa_user_to_conduit(const struct net_device *dev) +{ + struct dsa_port *dp = dsa_user_to_port(dev); + + return dsa_port_to_conduit(dp); +} + +#endif diff --git a/net/ethtool/common.c b/net/ethtool/common.c index f5598c5f50..b4419fb6df 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -685,3 +685,24 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, link_ksettings->base.duplex = link_info->duplex; } EXPORT_SYMBOL_GPL(ethtool_params_from_link_mode); + +/** + * ethtool_forced_speed_maps_init + * @maps: Pointer to an array of Ethtool forced speed map + * @size: Array size + * + * Initialize an array of Ethtool forced speed map to Ethtool link modes. This + * should be called during driver module init. + */ +void +ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) +{ + for (u32 i = 0; i < size; i++) { + struct ethtool_forced_speed_map *map = &maps[i]; + + linkmode_set_bit_array(map->cap_arr, map->arr_size, map->caps); + map->cap_arr = NULL; + map->arr_size = 0; + } +} +EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); diff --git a/net/handshake/genl.c b/net/handshake/genl.c index 233be5cbfe..f55d14d7b7 100644 --- a/net/handshake/genl.c +++ b/net/handshake/genl.c @@ -18,7 +18,7 @@ static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HAN /* HANDSHAKE_CMD_DONE - do */ static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = { [HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, }, - [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_U32, }, + [HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_S32, }, [HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, }, }; diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c index 16ed7bfd29..34fd1d9b2d 100644 --- a/net/handshake/handshake-test.c +++ b/net/handshake/handshake-test.c @@ -471,7 +471,10 @@ static void handshake_req_destroy_test1(struct kunit *test) handshake_req_cancel(sock->sk); /* Act */ - fput(filp); + /* Ensure the close/release/put process has run to + * completion before checking the result. + */ + __fput_sync(filp); /* Assert */ KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req); diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c index 80c7302692..89637e7328 100644 --- a/net/handshake/netlink.c +++ b/net/handshake/netlink.c @@ -143,7 +143,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD)) return -EINVAL; - fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]); + fd = nla_get_s32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]); sock = sockfd_lookup(fd, &err); if (!sock) diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index bbfb4095dd..d697f68c59 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -173,9 +173,9 @@ static int tls_handshake_put_certificate(struct sk_buff *msg, if (!entry_attr) return -EMSGSIZE; - if (nla_put_u32(msg, HANDSHAKE_A_X509_CERT, + if (nla_put_s32(msg, HANDSHAKE_A_X509_CERT, treq->th_certificate) || - nla_put_u32(msg, HANDSHAKE_A_X509_PRIVKEY, + nla_put_s32(msg, HANDSHAKE_A_X509_PRIVKEY, treq->th_privkey)) { nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; @@ -214,7 +214,7 @@ static int tls_handshake_accept(struct handshake_req *req, goto out_cancel; ret = -EMSGSIZE; - ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd); + ret = nla_put_s32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd); if (ret < 0) goto out_cancel; ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 306f942c3b..dd4b5f0aa1 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -291,7 +291,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -338,7 +338,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 2dfb12230f..8e94ed7c56 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -741,10 +741,27 @@ config DEFAULT_TCP_CONG default "bbr" if DEFAULT_BBR default "cubic" +config TCP_SIGPOOL + tristate + +config TCP_AO + bool "TCP: Authentication Option (RFC5925)" + select CRYPTO + select TCP_SIGPOOL + depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) + help + TCP-AO specifies the use of stronger Message Authentication Codes (MACs), + protects against replays for long-lived TCP connections, and + provides more details on the association of security with TCP + connections than TCP MD5 (See RFC5925) + + If unsure, say N. + config TCP_MD5SIG bool "TCP: MD5 Signature Option support (RFC2385)" select CRYPTO select CRYPTO_MD5 + select TCP_SIGPOOL help RFC2385 specifies a method of giving MD5 protection to TCP sessions. Its main (only?) use is to protect BGP sessions between core routers diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index b18ba8ef93..e144a02a6a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -62,12 +62,14 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o +obj-$(CONFIG_TCP_SIGPOOL) += tcp_sigpool.o obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o obj-$(CONFIG_BPF_SYSCALL) += udp_bpf.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o xfrm4_protocol.o +obj-$(CONFIG_TCP_AO) += tcp_ao.o ifeq ($(CONFIG_BPF_JIT),y) obj-$(CONFIG_BPF_SYSCALL) += bpf_tcp_ca.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1c58bd72e1..e59962f34c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1628,10 +1628,12 @@ EXPORT_SYMBOL(inet_current_timestamp); int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { - if (sk->sk_family == AF_INET) + unsigned int family = READ_ONCE(sk->sk_family); + + if (family == AF_INET) return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == AF_INET6) + if (family == AF_INET6) return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); #endif return -EINVAL; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 015c0f4ec5..a2e6e1fdf8 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) "IPsec: " fmt -#include <crypto/algapi.h> #include <crypto/hash.h> +#include <crypto/utils.h> #include <linux/err.h> #include <linux/module.h> #include <linux/slab.h> @@ -27,9 +27,7 @@ static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags, { unsigned int len; - len = size + crypto_ahash_digestsize(ahash) + - (crypto_ahash_alignmask(ahash) & - ~(crypto_tfm_ctx_alignment() - 1)); + len = size + crypto_ahash_digestsize(ahash); len = ALIGN(len, crypto_tfm_ctx_alignment()); @@ -46,10 +44,9 @@ static inline u8 *ah_tmp_auth(void *tmp, unsigned int offset) return tmp + offset; } -static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp, - unsigned int offset) +static inline u8 *ah_tmp_icv(void *tmp, unsigned int offset) { - return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1); + return tmp + offset; } static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash, @@ -129,7 +126,7 @@ static void ah_output_done(void *data, int err) int ihl = ip_hdrlen(skb); iph = AH_SKB_CB(skb)->tmp; - icv = ah_tmp_icv(ahp->ahash, iph, ihl); + icv = ah_tmp_icv(iph, ihl); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; @@ -182,7 +179,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) if (!iph) goto out; seqhi = (__be32 *)((char *)iph + ihl); - icv = ah_tmp_icv(ahash, seqhi, seqhi_len); + icv = ah_tmp_icv(seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); seqhisg = sg + nfrags; @@ -279,7 +276,7 @@ static void ah_input_done(void *data, int err) work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, ihl); - icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); + icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) @@ -374,7 +371,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) seqhi = (__be32 *)((char *)work_iph + ihl); auth_data = ah_tmp_auth(seqhi, seqhi_len); - icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); + icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); seqhisg = sg + nfrags; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9456f5bb35..0d0d725b46 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) if (neigh) { if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) { read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + memcpy(r->arp_ha.sa_data, neigh->ha, + min(dev->addr_len, sizeof(r->arp_ha.sa_data_min))); r->arp_flags = arp_state_to_flags(neigh); read_unlock_bh(&neigh->lock); r->arp_ha.sa_family = dev->type; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index cb5dbee9e0..2cc50cbfc2 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -39,11 +39,11 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len saddr = inet->inet_saddr; if (ipv4_is_multicast(usin->sin_addr.s_addr)) { if (!oif || netif_index_is_l3_master(sock_net(sk), oif)) - oif = inet->mc_index; + oif = READ_ONCE(inet->mc_index); if (!saddr) - saddr = inet->mc_addr; + saddr = READ_ONCE(inet->mc_addr); } else if (!oif) { - oif = inet->uc_index; + oif = READ_ONCE(inet->uc_index); } fl4 = &inet->cork.fl.u.ip4; rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ca0ff15dc8..bc74f131fe 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1825,6 +1825,21 @@ done: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv4.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; @@ -1876,8 +1891,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &tgt_net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^ - tgt_net->dev_base_seq; + cb->seq = inet_base_seq(tgt_net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -2278,8 +2292,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 10e96ed6c9..b3271957ad 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; + int encap_type = 0; __be32 seq; __be32 spi; @@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xo->flags |= XFRM_GRO; + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) + encap_type = UDP_ENCAP_ESPINUDP; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); @@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ - xfrm_input(skb, IPPROTO_ESP, spi, -2); + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: diff --git a/net/ipv4/fou_bpf.c b/net/ipv4/fou_bpf.c index 3760a14b6b..4da03bf45c 100644 --- a/net/ipv4/fou_bpf.c +++ b/net/ipv4/fou_bpf.c @@ -22,9 +22,7 @@ enum bpf_fou_encap_type { FOU_BPF_ENCAP_GUE, }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in BTF"); +__bpf_kfunc_start_defs(); /* bpf_skb_set_fou_encap - Set FOU encap parameters * @@ -100,7 +98,7 @@ __bpf_kfunc int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, return 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(fou_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_set_fou_encap) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b8607763d1..e63a3bf996 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -517,7 +517,7 @@ static struct rtable *icmp_route_lookup(struct net *net, } else return rt; - err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); + err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); if (err) goto relookup_failed; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d515881d02..efeeca2b13 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2946,8 +2946,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; spin_lock_bh(&state->im->lock); psf = state->im->sources; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e13a844334..7d0e7aaa71 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -134,7 +134,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, * hence this needs to be included regardless of socket family. */ if (ext & (1 << (INET_DIAG_TOS - 1))) - if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0) + if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0) goto errout; #if IS_ENABLED(CONFIG_IPV6) @@ -165,7 +165,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, * For cgroup2 classid is always zero. */ if (!classid) - classid = sk->sk_priority; + classid = READ_ONCE(sk->sk_priority); if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) goto errout; @@ -1481,5 +1481,6 @@ static void __exit inet_diag_exit(void) module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("INET/INET6: socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2 /* AF_INET */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10 /* AF_INET6 */); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index a532f749e4..9456bf9e27 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1131,10 +1131,33 @@ ok: return 0; error: + if (sk_hashed(sk)) { + spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); + + sock_prot_inuse_add(net, sk->sk_prot, -1); + + spin_lock(lock); + sk_nulls_del_node_init_rcu(sk); + spin_unlock(lock); + + sk->sk_hash = 0; + inet_sk(sk)->inet_sport = 0; + inet_sk(sk)->inet_num = 0; + + if (tw) + inet_twsk_bind_unhash(tw, hinfo); + } + spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); - spin_unlock_bh(&head->lock); + spin_unlock(&head->lock); + + if (tw) + inet_twsk_deschedule_put(tw); + + local_bh_enable(); + return -ENOMEM; } diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 66fac1216d..8b65f12583 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -66,8 +66,6 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s { struct ip_options *opt = &(IPCB(skb)->opt); - __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); - #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); @@ -130,6 +128,8 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; + __IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS); + IPCB(skb)->flags |= IPSKB_FORWARDED; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4ab877cf6d..41537d18ee 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -101,6 +101,8 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); + IP_INC_STATS(net, IPSTATS_MIB_OUTREQUESTS); + iph_set_totlen(iph, skb->len); ip_send_check(iph); @@ -544,7 +546,7 @@ EXPORT_SYMBOL(__ip_queue_xmit); int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) { - return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos); + return __ip_queue_xmit(sk, skb, fl, READ_ONCE(inet_sk(sk)->tos)); } EXPORT_SYMBOL(ip_queue_xmit); @@ -1285,6 +1287,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, if (unlikely(!rt)) return -EFAULT; + cork->fragsize = ip_sk_use_pmtu(sk) ? + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + /* * setup for corking. */ @@ -1301,12 +1309,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->addr = ipc->addr; } - cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); - - if (!inetdev_valid_mtu(cork->fragsize)) - return -ENETUNREACH; - cork->gso_size = ipc->gso_size; cork->dst = &rt->dst; @@ -1387,8 +1389,8 @@ struct sk_buff *__ip_make_skb(struct sock *sk, struct ip_options *opt = NULL; struct rtable *rt = (struct rtable *)cork->dst; struct iphdr *iph; + u8 pmtudisc, ttl; __be16 df = 0; - __u8 ttl; skb = __skb_dequeue(queue); if (!skb) @@ -1418,8 +1420,9 @@ struct sk_buff *__ip_make_skb(struct sock *sk, /* DF bit is set when we want to see DF on outgoing frames. * If ignore_df is set too, we still allow to fragment this frame * locally. */ - if (inet->pmtudisc == IP_PMTUDISC_DO || - inet->pmtudisc == IP_PMTUDISC_PROBE || + pmtudisc = READ_ONCE(inet->pmtudisc); + if (pmtudisc == IP_PMTUDISC_DO || + pmtudisc == IP_PMTUDISC_PROBE || (skb->len <= dst_mtu(&rt->dst) && ip_dont_fragment(sk, &rt->dst))) df = htons(IP_DF); @@ -1430,14 +1433,14 @@ struct sk_buff *__ip_make_skb(struct sock *sk, if (cork->ttl != 0) ttl = cork->ttl; else if (rt->rt_type == RTN_MULTICAST) - ttl = inet->mc_ttl; + ttl = READ_ONCE(inet->mc_ttl); else ttl = ip_select_ttl(inet, &rt->dst); iph = ip_hdr(skb); iph->version = 4; iph->ihl = 5; - iph->tos = (cork->tos != -1) ? cork->tos : inet->tos; + iph->tos = (cork->tos != -1) ? cork->tos : READ_ONCE(inet->tos); iph->frag_off = df; iph->ttl = ttl; iph->protocol = sk->sk_protocol; @@ -1449,7 +1452,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_options_build(skb, opt, cork->addr, rt); } - skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; + skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; skb->tstamp = cork->transmit_time; /* diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cce9cb25f3..8a88e705d8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -587,12 +587,14 @@ out: void __ip_sock_set_tos(struct sock *sk, int val) { + u8 old_tos = inet_sk(sk)->tos; + if (sk->sk_type == SOCK_STREAM) { val &= ~INET_ECN_MASK; - val |= inet_sk(sk)->tos & INET_ECN_MASK; + val |= old_tos & INET_ECN_MASK; } - if (inet_sk(sk)->tos != val) { - inet_sk(sk)->tos = val; + if (old_tos != val) { + WRITE_ONCE(inet_sk(sk)->tos, val); WRITE_ONCE(sk->sk_priority, rt_tos2priority(val)); sk_dst_reset(sk); } @@ -600,9 +602,9 @@ void __ip_sock_set_tos(struct sock *sk, int val) void ip_sock_set_tos(struct sock *sk, int val) { - lock_sock(sk); + sockopt_lock_sock(sk); __ip_sock_set_tos(sk, val); - release_sock(sk); + sockopt_release_sock(sk); } EXPORT_SYMBOL(ip_sock_set_tos); @@ -622,9 +624,7 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val) { if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) return -EINVAL; - lock_sock(sk); - inet_sk(sk)->pmtudisc = val; - release_sock(sk); + WRITE_ONCE(inet_sk(sk)->pmtudisc, val); return 0; } EXPORT_SYMBOL(ip_sock_set_mtu_discover); @@ -1039,6 +1039,22 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(inet->min_ttl, val); return 0; + case IP_MULTICAST_TTL: + if (sk->sk_type == SOCK_STREAM) + return -EINVAL; + if (optlen < 1) + return -EINVAL; + if (val == -1) + val = 1; + if (val < 0 || val > 255) + return -EINVAL; + WRITE_ONCE(inet->mc_ttl, val); + return 0; + case IP_MTU_DISCOVER: + return ip_sock_set_mtu_discover(sk, val); + case IP_TOS: /* This sets both TOS and Precedence */ + ip_sock_set_tos(sk, val); + return 0; } err = 0; @@ -1093,25 +1109,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, } } break; - case IP_TOS: /* This sets both TOS and Precedence */ - __ip_sock_set_tos(sk, val); - break; - case IP_MTU_DISCOVER: - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT) - goto e_inval; - inet->pmtudisc = val; - break; - case IP_MULTICAST_TTL: - if (sk->sk_type == SOCK_STREAM) - goto e_inval; - if (optlen < 1) - goto e_inval; - if (val == -1) - val = 1; - if (val < 0 || val > 255) - goto e_inval; - inet->mc_ttl = val; - break; case IP_UNICAST_IF: { struct net_device *dev = NULL; @@ -1123,7 +1120,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, ifindex = (__force int)ntohl((__force __be32)val); if (ifindex == 0) { - inet->uc_index = 0; + WRITE_ONCE(inet->uc_index, 0); err = 0; break; } @@ -1140,7 +1137,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_bound_dev_if && midx != sk->sk_bound_dev_if) break; - inet->uc_index = ifindex; + WRITE_ONCE(inet->uc_index, ifindex); err = 0; break; } @@ -1178,8 +1175,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, if (!mreq.imr_ifindex) { if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) { - inet->mc_index = 0; - inet->mc_addr = 0; + WRITE_ONCE(inet->mc_index, 0); + WRITE_ONCE(inet->mc_addr, 0); err = 0; break; } @@ -1204,8 +1201,8 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, midx != sk->sk_bound_dev_if) break; - inet->mc_index = mreq.imr_ifindex; - inet->mc_addr = mreq.imr_address.s_addr; + WRITE_ONCE(inet->mc_index, mreq.imr_ifindex); + WRITE_ONCE(inet->mc_addr, mreq.imr_address.s_addr); err = 0; break; } @@ -1369,12 +1366,13 @@ e_inval: * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer + * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); bool prepare = inet_test_bit(PKTINFO, sk) || @@ -1403,7 +1401,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + if (drop_dst) + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, @@ -1592,27 +1591,29 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_MINTTL: val = READ_ONCE(inet->min_ttl); goto copyval; - } - - if (needs_rtnl) - rtnl_lock(); - sockopt_lock_sock(sk); - - switch (optname) { + case IP_MULTICAST_TTL: + val = READ_ONCE(inet->mc_ttl); + goto copyval; + case IP_MTU_DISCOVER: + val = READ_ONCE(inet->pmtudisc); + goto copyval; + case IP_TOS: + val = READ_ONCE(inet->tos); + goto copyval; case IP_OPTIONS: { unsigned char optbuf[sizeof(struct ip_options)+40]; struct ip_options *opt = (struct ip_options *)optbuf; struct ip_options_rcu *inet_opt; - inet_opt = rcu_dereference_protected(inet->inet_opt, - lockdep_sock_is_held(sk)); + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); opt->optlen = 0; if (inet_opt) memcpy(optbuf, &inet_opt->opt, sizeof(struct ip_options) + inet_opt->opt.optlen); - sockopt_release_sock(sk); + rcu_read_unlock(); if (opt->optlen == 0) { len = 0; @@ -1628,12 +1629,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } - case IP_TOS: - val = inet->tos; - break; - case IP_MTU_DISCOVER: - val = inet->pmtudisc; - break; case IP_MTU: { struct dst_entry *dst; @@ -1643,24 +1638,55 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, val = dst_mtu(dst); dst_release(dst); } - if (!val) { - sockopt_release_sock(sk); + if (!val) return -ENOTCONN; + goto copyval; + } + case IP_PKTOPTIONS: + { + struct msghdr msg; + + if (sk->sk_type != SOCK_STREAM) + return -ENOPROTOOPT; + + if (optval.is_kernel) { + msg.msg_control_is_user = false; + msg.msg_control = optval.kernel; + } else { + msg.msg_control_is_user = true; + msg.msg_control_user = optval.user; } - break; + msg.msg_controllen = len; + msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; + + if (inet_test_bit(PKTINFO, sk)) { + struct in_pktinfo info; + + info.ipi_addr.s_addr = READ_ONCE(inet->inet_rcv_saddr); + info.ipi_spec_dst.s_addr = READ_ONCE(inet->inet_rcv_saddr); + info.ipi_ifindex = READ_ONCE(inet->mc_index); + put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); + } + if (inet_test_bit(TTL, sk)) { + int hlim = READ_ONCE(inet->mc_ttl); + + put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); + } + if (inet_test_bit(TOS, sk)) { + int tos = READ_ONCE(inet->rcv_tos); + put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); + } + len -= msg.msg_controllen; + return copy_to_sockptr(optlen, &len, sizeof(int)); } - case IP_MULTICAST_TTL: - val = inet->mc_ttl; - break; case IP_UNICAST_IF: - val = (__force int)htonl((__u32) inet->uc_index); - break; + val = (__force int)htonl((__u32) READ_ONCE(inet->uc_index)); + goto copyval; case IP_MULTICAST_IF: { struct in_addr addr; len = min_t(unsigned int, len, sizeof(struct in_addr)); - addr.s_addr = inet->mc_addr; - sockopt_release_sock(sk); + addr.s_addr = READ_ONCE(inet->mc_addr); if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; @@ -1668,6 +1694,13 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } + } + + if (needs_rtnl) + rtnl_lock(); + sockopt_lock_sock(sk); + + switch (optname) { case IP_MSFILTER: { struct ip_msfilter msf; @@ -1690,44 +1723,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname, else err = ip_get_mcast_msfilter(sk, optval, optlen, len); goto out; - case IP_PKTOPTIONS: - { - struct msghdr msg; - - sockopt_release_sock(sk); - - if (sk->sk_type != SOCK_STREAM) - return -ENOPROTOOPT; - - if (optval.is_kernel) { - msg.msg_control_is_user = false; - msg.msg_control = optval.kernel; - } else { - msg.msg_control_is_user = true; - msg.msg_control_user = optval.user; - } - msg.msg_controllen = len; - msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0; - - if (inet_test_bit(PKTINFO, sk)) { - struct in_pktinfo info; - - info.ipi_addr.s_addr = inet->inet_rcv_saddr; - info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr; - info.ipi_ifindex = inet->mc_index; - put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); - } - if (inet_test_bit(TTL, sk)) { - int hlim = inet->mc_ttl; - put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim); - } - if (inet_test_bit(TOS, sk)) { - int tos = inet->rcv_tos; - put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos); - } - len -= msg.msg_controllen; - return copy_to_sockptr(optlen, &len, sizeof(int)); - } case IP_LOCAL_PORT_RANGE: val = inet->local_port_range.hi << 16 | inet->local_port_range.lo; break; diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 586b1b3e35..80ccd6661a 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) }; skb_reset_network_header(skb); - csum = csum_partial(icmp6h, len, 0); + csum = skb_checksum(skb, skb_transport_offset(skb), len, 0); icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, IPPROTO_ICMPV6, csum); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index d1e7d0ceb7..9ab9b3ebe0 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -288,11 +288,11 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; default: goto tx_err; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0063a23725..e49242706b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - ipv4_pktinfo_prepare(mroute_sk, pkt); + ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index bd13516548..591a273780 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -62,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { + xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 3abb430af9..385d945d8e 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -36,12 +36,12 @@ static const struct xt_table packet_mangler = { static unsigned int ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - unsigned int ret; + unsigned int ret, verdict; const struct iphdr *iph; - u_int8_t tos; __be32 saddr, daddr; - u_int32_t mark; + u32 mark; int err; + u8 tos; /* Save things which could affect route */ mark = skb->mark; @@ -51,8 +51,9 @@ ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat tos = iph->tos; ret = ipt_do_table(priv, skb, state); + verdict = ret & NF_VERDICT_MASK; /* Reroute for ANY change. */ - if (ret != NF_DROP && ret != NF_STOLEN) { + if (verdict != NF_DROP && verdict != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 56f6ecc434..4d42d0756f 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -170,3 +170,4 @@ module_init(iptable_nat_init); module_exit(iptable_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("iptables legacy nat table"); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index ca5e5b2158..0e7f53964d 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -108,3 +108,4 @@ static void __exit iptable_raw_fini(void) module_init(iptable_raw_init); module_exit(iptable_raw_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("iptables legacy raw table"); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 265b39bc43..482e733c33 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -186,3 +186,4 @@ module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 defragmentation support"); diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 index 24b73268f3..dc2cc57941 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.asn1 @@ -1,3 +1,11 @@ +-- SPDX-License-Identifier: BSD-3-Clause +-- +-- Copyright (C) 1990, 2002 IETF Trust and the persons identified as authors +-- of the code +-- +-- https://www.rfc-editor.org/rfc/rfc1157#section-4 +-- https://www.rfc-editor.org/rfc/rfc3416#section-3 + Message ::= SEQUENCE { version diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index fc761915c5..04504b2b51 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -339,3 +339,4 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook) EXPORT_SYMBOL_GPL(nf_send_unreach); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 packet rejection core"); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 4cb0c896ca..823306487a 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -551,7 +551,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + if (READ_ONCE(inet_sock->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; break; @@ -581,7 +581,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) * 4.1.3.3. */ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) || - (family == AF_INET6 && !inet6_sk(sk)->recverr)) { + (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { @@ -773,11 +773,11 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) - ipc.oif = inet->mc_index; + ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) - saddr = inet->mc_addr; + saddr = READ_ONCE(inet->mc_addr); } else if (!ipc.oif) - ipc.oif = inet->uc_index; + ipc.oif = READ_ONCE(inet->uc_index); flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, @@ -899,7 +899,6 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6hdr *ip6 = ipv6_hdr(skb); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -908,7 +907,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; sin6->sin6_flowinfo = 0; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin6->sin6_flowinfo = ip6_flowinfo(ip6); sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index eaf1d3113b..5f4654ebff 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -83,7 +83,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = { SNMP_MIB_ITEM("InUnknownProtos", IPSTATS_MIB_INUNKNOWNPROTOS), SNMP_MIB_ITEM("InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("InDelivers", IPSTATS_MIB_INDELIVERS), - SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTPKTS), + SNMP_MIB_ITEM("OutRequests", IPSTATS_MIB_OUTREQUESTS), SNMP_MIB_ITEM("OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), @@ -93,6 +93,7 @@ static const struct snmp_mib snmp4_ipstats_list[] = { SNMP_MIB_ITEM("FragOKs", IPSTATS_MIB_FRAGOKS), SNMP_MIB_ITEM("FragFails", IPSTATS_MIB_FRAGFAILS), SNMP_MIB_ITEM("FragCreates", IPSTATS_MIB_FRAGCREATES), + SNMP_MIB_ITEM("OutTransmits", IPSTATS_MIB_OUTPKTS), SNMP_MIB_SENTINEL }; @@ -298,6 +299,11 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMigrateReqSuccess", LINUX_MIB_TCPMIGRATEREQSUCCESS), SNMP_MIB_ITEM("TCPMigrateReqFailure", LINUX_MIB_TCPMIGRATEREQFAILURE), SNMP_MIB_ITEM("TCPPLBRehash", LINUX_MIB_TCPPLBREHASH), + SNMP_MIB_ITEM("TCPAORequired", LINUX_MIB_TCPAOREQUIRED), + SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD), + SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND), + SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD), + SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4b5db5d1ed..aea89326c6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -239,7 +239,7 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) if (code > NR_ICMP_UNREACH) break; if (code == ICMP_FRAG_NEEDED) { - harderr = inet->pmtudisc != IP_PMTUDISC_DONT; + harderr = READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT; err = EMSGSIZE; } else { err = icmp_err_convert[code].errno; @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { kfree_skb_reason(skb, reason); return NET_RX_DROP; @@ -482,7 +482,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int free = 0; __be32 daddr; __be32 saddr; - int err; + int uc_index, err; struct ip_options_data opt_copy; struct raw_frag_vec rfv; int hdrincl; @@ -576,24 +576,25 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); + uc_index = READ_ONCE(inet->uc_index); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) - ipc.oif = inet->mc_index; + ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) - saddr = inet->mc_addr; + saddr = READ_ONCE(inet->mc_addr); } else if (!ipc.oif) { - ipc.oif = inet->uc_index; - } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + ipc.oif = uc_index; + } else if (ipv4_is_lbcast(daddr) && uc_index) { /* oif is set, packet is to local broadcast * and uc_index is set. oif is most likely set * by sk_bound_dev_if. If uc_index != oif check if the * oif is an L3 master and uc_index is an L3 slave. * If so, we want to allow the send using the uc_index. */ - if (ipc.oif != inet->uc_index && + if (ipc.oif != uc_index && ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), - inet->uc_index)) { - ipc.oif = inet->uc_index; + uc_index)) { + ipc.oif = uc_index; } } diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 63a40e4b67..fe2140c837 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -257,5 +257,6 @@ static void __exit raw_diag_exit(void) module_init(raw_diag_init); module_exit(raw_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAW socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3bad9aa066..16615d107c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1632,7 +1632,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, { struct rtable *rt; - rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, (noxfrm ? DST_NOXFRM : 0)); if (rt) { @@ -1660,7 +1660,7 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) { struct rtable *new_rt; - new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, + new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, rt->dst.flags); if (new_rt) { @@ -2834,7 +2834,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; @@ -2885,54 +2885,6 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); -struct rtable *ip_route_output_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, __be32 *saddr, - const struct ip_tunnel_info *info, - u8 protocol, bool use_cache) -{ -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif - struct rtable *rt = NULL; - struct flowi4 fl4; - __u8 tos; - -#ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { - rt = dst_cache_get_ip4(dst_cache, saddr); - if (rt) - return rt; - } -#endif - memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_mark = skb->mark; - fl4.flowi4_proto = protocol; - fl4.daddr = info->key.u.ipv4.dst; - fl4.saddr = info->key.u.ipv4.src; - tos = info->key.tos; - fl4.flowi4_tos = RT_TOS(tos); - - rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); - return ERR_PTR(-ENETUNREACH); - } - if (rt->dst.dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); - ip_rt_put(rt); - return ERR_PTR(-ELOOP); - } -#ifdef CONFIG_DST_CACHE - if (use_cache) - dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); -#endif - *saddr = fl4.saddr; - return rt; -} -EXPORT_SYMBOL_GPL(ip_route_output_tunnel); - /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, struct flowi4 *fl4, diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3b4dafefb4..d37282c06e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -51,6 +51,14 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, count, &syncookie_secret[c]); } +/* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */ +static u64 tcp_ns_to_ts(bool usec_ts, u64 val) +{ + if (usec_ts) + return div_u64(val, NSEC_PER_USEC); + + return div_u64(val, NSEC_PER_MSEC); +} /* * when syncookies are in effect and tcp timestamps are enabled we encode @@ -62,7 +70,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport, u64 cookie_init_timestamp(struct request_sock *req, u64 now) { const struct inet_request_sock *ireq = inet_rsk(req); - u64 ts, ts_now = tcp_ns_to_ts(now); + u64 ts, ts_now = tcp_ns_to_ts(false, now); u32 options = 0; options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK; @@ -76,7 +84,9 @@ u64 cookie_init_timestamp(struct request_sock *req, u64 now) if (ts > ts_now) ts -= (1UL << TSBITS); - return ts * (NSEC_PER_SEC / TCP_TS_HZ); + if (tcp_rsk(req)->req_usec_ts) + return ts * NSEC_PER_USEC; + return ts * NSEC_PER_MSEC; } @@ -296,6 +306,8 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, treq->af_specific = af_ops; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; + treq->req_usec_ts = false; + #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); if (treq->is_mptcp) { @@ -332,6 +344,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) __u8 rcv_wscale; struct flowi4 fl4; u32 tsoff = 0; + int l3index; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -393,6 +406,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_iif = inet_request_bound_dev_if(sk, skb); + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index); + /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 6ac890b407..f63a545a73 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1367,6 +1367,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, }, { + .procname = "tcp_backlog_ack_defer", + .data = &init_net.ipv4.sysctl_tcp_backlog_ack_defer, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { .procname = "tcp_reflect_tos", .data = &init_net.ipv4.sysctl_tcp_reflect_tos, .maxlen = sizeof(u8), @@ -1489,6 +1498,14 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "tcp_pingpong_thresh", + .data = &init_net.ipv4.sysctl_tcp_pingpong_thresh, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ONE, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fb417aee86..b30ef770a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1786,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, @@ -3610,6 +3620,35 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, __tcp_sock_set_quickack(sk, val); break; + case TCP_AO_REPAIR: + if (!tcp_can_repair_sock(sk)) { + err = -EPERM; + break; + } + err = tcp_ao_set_repair(sk, optval, optlen); + break; +#ifdef CONFIG_TCP_AO + case TCP_AO_ADD_KEY: + case TCP_AO_DEL_KEY: + case TCP_AO_INFO: { + /* If this is the first TCP-AO setsockopt() on the socket, + * sk_state has to be LISTEN or CLOSE. Allow TCP_REPAIR + * in any state. + */ + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) + goto ao_parse; + if (rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk))) + goto ao_parse; + if (tp->repair) + goto ao_parse; + err = -EISCONN; + break; +ao_parse: + err = tp->af_specific->ao_parse(sk, optname, optval, optlen); + break; + } +#endif #ifdef CONFIG_TCP_MD5SIG case TCP_MD5SIG: case TCP_MD5SIG_EXT: @@ -3648,10 +3687,16 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname, tp->fastopen_no_cookie = val; break; case TCP_TIMESTAMP: - if (!tp->repair) + if (!tp->repair) { err = -EPERM; - else - WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw()); + break; + } + /* val is an opaque field, + * and low order bit contains usec_ts enable bit. + * Its a best effort, and we do not care if user makes an error. + */ + tp->tcp_usec_ts = val & 1; + WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts)); break; case TCP_REPAIR_WINDOW: err = tcp_repair_set_window(tp, optval, optlen); @@ -3773,10 +3818,12 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_ECN_SEEN; if (tp->syn_data_acked) info->tcpi_options |= TCPI_OPT_SYN_DATA; + if (tp->tcp_usec_ts) + info->tcpi_options |= TCPI_OPT_USEC_TS; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); - info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato, - tcp_delack_max(sk))); + info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, + tcp_delack_max(sk))); info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; @@ -3832,6 +3879,13 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wnd = tp->rcv_wnd; info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash; info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; + + info->tcpi_total_rto = tp->total_rto; + info->tcpi_total_rto_recoveries = tp->total_rto_recoveries; + info->tcpi_total_rto_time = tp->total_rto_time; + if (tp->rto_stamp) + info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp; + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -4155,7 +4209,11 @@ int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_TIMESTAMP: - val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset); + val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset); + if (tp->tcp_usec_ts) + val |= 1; + else + val &= ~1; break; case TCP_NOTSENT_LOWAT: val = READ_ONCE(tp->notsent_lowat); @@ -4265,6 +4323,23 @@ zerocopy_rcv_out: return err; } #endif + case TCP_AO_REPAIR: + if (!tcp_can_repair_sock(sk)) + return -EPERM; + return tcp_ao_get_repair(sk, optval, optlen); + case TCP_AO_GET_KEYS: + case TCP_AO_INFO: { + int err; + + sockopt_lock_sock(sk); + if (optname == TCP_AO_GET_KEYS) + err = tcp_ao_get_mkts(sk, optval, optlen); + else + err = tcp_ao_get_sock_info(sk, optval, optlen); + sockopt_release_sock(sk); + + return err; + } default: return -ENOPROTOOPT; } @@ -4303,141 +4378,52 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, EXPORT_SYMBOL(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG -static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); -static DEFINE_MUTEX(tcp_md5sig_mutex); -static bool tcp_md5sig_pool_populated = false; - -static void __tcp_alloc_md5sig_pool(void) -{ - struct crypto_ahash *hash; - int cpu; - - hash = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(hash)) - return; - - for_each_possible_cpu(cpu) { - void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch; - struct ahash_request *req; - - if (!scratch) { - scratch = kmalloc_node(sizeof(union tcp_md5sum_block) + - sizeof(struct tcphdr), - GFP_KERNEL, - cpu_to_node(cpu)); - if (!scratch) - return; - per_cpu(tcp_md5sig_pool, cpu).scratch = scratch; - } - if (per_cpu(tcp_md5sig_pool, cpu).md5_req) - continue; - - req = ahash_request_alloc(hash, GFP_KERNEL); - if (!req) - return; - - ahash_request_set_callback(req, 0, NULL, NULL); - - per_cpu(tcp_md5sig_pool, cpu).md5_req = req; - } - /* before setting tcp_md5sig_pool_populated, we must commit all writes - * to memory. See smp_rmb() in tcp_get_md5sig_pool() - */ - smp_wmb(); - /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool() - * and tcp_get_md5sig_pool(). - */ - WRITE_ONCE(tcp_md5sig_pool_populated, true); -} +int tcp_md5_sigpool_id = -1; +EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id); -bool tcp_alloc_md5sig_pool(void) +int tcp_md5_alloc_sigpool(void) { - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) { - mutex_lock(&tcp_md5sig_mutex); - - if (!tcp_md5sig_pool_populated) - __tcp_alloc_md5sig_pool(); + size_t scratch_size; + int ret; - mutex_unlock(&tcp_md5sig_mutex); + scratch_size = sizeof(union tcp_md5sum_block) + sizeof(struct tcphdr); + ret = tcp_sigpool_alloc_ahash("md5", scratch_size); + if (ret >= 0) { + /* As long as any md5 sigpool was allocated, the return + * id would stay the same. Re-write the id only for the case + * when previously all MD5 keys were deleted and this call + * allocates the first MD5 key, which may return a different + * sigpool id than was used previously. + */ + WRITE_ONCE(tcp_md5_sigpool_id, ret); /* Avoids the compiler potentially being smart here */ + return 0; } - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - return READ_ONCE(tcp_md5sig_pool_populated); + return ret; } -EXPORT_SYMBOL(tcp_alloc_md5sig_pool); - -/** - * tcp_get_md5sig_pool - get md5sig_pool for this user - * - * We use percpu structure, so if we succeed, we exit with preemption - * and BH disabled, to make sure another thread or softirq handling - * wont try to get same context. - */ -struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) +void tcp_md5_release_sigpool(void) { - local_bh_disable(); - - /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */ - if (READ_ONCE(tcp_md5sig_pool_populated)) { - /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ - smp_rmb(); - return this_cpu_ptr(&tcp_md5sig_pool); - } - local_bh_enable(); - return NULL; + tcp_sigpool_release(READ_ONCE(tcp_md5_sigpool_id)); } -EXPORT_SYMBOL(tcp_get_md5sig_pool); -int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - const struct sk_buff *skb, unsigned int header_len) +void tcp_md5_add_sigpool(void) { - struct scatterlist sg; - const struct tcphdr *tp = tcp_hdr(skb); - struct ahash_request *req = hp->md5_req; - unsigned int i; - const unsigned int head_data_len = skb_headlen(skb) > header_len ? - skb_headlen(skb) - header_len : 0; - const struct skb_shared_info *shi = skb_shinfo(skb); - struct sk_buff *frag_iter; - - sg_init_table(&sg, 1); - - sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len); - ahash_request_set_crypt(req, &sg, NULL, head_data_len); - if (crypto_ahash_update(req)) - return 1; - - for (i = 0; i < shi->nr_frags; ++i) { - const skb_frag_t *f = &shi->frags[i]; - unsigned int offset = skb_frag_off(f); - struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); - - sg_set_page(&sg, page, skb_frag_size(f), - offset_in_page(offset)); - ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); - if (crypto_ahash_update(req)) - return 1; - } - - skb_walk_frags(skb, frag_iter) - if (tcp_md5_hash_skb_data(hp, frag_iter, 0)) - return 1; - - return 0; + tcp_sigpool_get(READ_ONCE(tcp_md5_sigpool_id)); } -EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_sigpool *hp, + const struct tcp_md5sig_key *key) { u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ struct scatterlist sg; sg_init_one(&sg, key->key, keylen); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, keylen); + ahash_request_set_crypt(hp->req, &sg, NULL, keylen); - /* We use data_race() because tcp_md5_do_add() might change key->key under us */ - return data_race(crypto_ahash_update(hp->md5_req)); + /* We use data_race() because tcp_md5_do_add() might change + * key->key under us + */ + return data_race(crypto_ahash_update(hp->req)); } EXPORT_SYMBOL(tcp_md5_hash_key); @@ -4445,42 +4431,24 @@ EXPORT_SYMBOL(tcp_md5_hash_key); enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, - int family, int dif, int sdif) + int family, int l3index, const __u8 *hash_location) { - /* - * This gets called for each TCP segment that arrives - * so we want to be efficient. + /* This gets called for each TCP segment that has TCP-MD5 option. * We have 3 drop cases: * o No MD5 hash and one expected. * o MD5 hash and we're not expecting one. * o MD5 hash and its wrong. */ - const __u8 *hash_location = NULL; - struct tcp_md5sig_key *hash_expected; - const struct tcphdr *th = tcp_hdr(skb); const struct tcp_sock *tp = tcp_sk(sk); - int genhash, l3index; + struct tcp_md5sig_key *key; u8 newhash[16]; + int genhash; - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - hash_expected = tcp_md5_do_lookup(sk, l3index, saddr, family); - hash_location = tcp_parse_md5sig_option(th); + key = tcp_md5_do_lookup(sk, l3index, saddr, family); - /* We've parsed the options - do we have a hash? */ - if (!hash_expected && !hash_location) - return SKB_NOT_DROPPED_YET; - - if (hash_expected && !hash_location) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return SKB_DROP_REASON_TCP_MD5NOTFOUND; - } - - if (!hash_expected && hash_location) { + if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); + tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4489,27 +4457,26 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, * IPv4-mapped case. */ if (family == AF_INET) - genhash = tcp_v4_md5_hash_skb(newhash, - hash_expected, - NULL, skb); + genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); else - genhash = tp->af_specific->calc_md5_hash(newhash, - hash_expected, + genhash = tp->af_specific->calc_md5_hash(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); if (family == AF_INET) { - net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n", - saddr, ntohs(th->source), - daddr, ntohs(th->dest), - genhash ? " tcp_v4_calc_md5_hash failed" - : "", l3index); + tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d", + genhash ? "tcp_v4_calc_md5_hash failed" + : "", l3index); } else { - net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n", - genhash ? "failed" : "mismatch", - saddr, ntohs(th->source), - daddr, ntohs(th->dest), l3index); + if (genhash) { + tcp_hash_fail("MD5 Hash failed", + AF_INET6, skb, "L3 index %d", + l3index); + } else { + tcp_hash_fail("MD5 Hash mismatch", + AF_INET6, skb, "L3 index %d", + l3index); + } } return SKB_DROP_REASON_TCP_MD5FAILURE; } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c new file mode 100644 index 0000000000..f8308d3f56 --- /dev/null +++ b/net/ipv4/tcp_ao.c @@ -0,0 +1,2396 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * INET An implementation of the TCP Authentication Option (TCP-AO). + * See RFC5925. + * + * Authors: Dmitry Safonov <dima@arista.com> + * Francesco Ruggeri <fruggeri@arista.com> + * Salam Noureddine <noureddine@arista.com> + */ +#define pr_fmt(fmt) "TCP: " fmt + +#include <crypto/hash.h> +#include <linux/inetdevice.h> +#include <linux/tcp.h> + +#include <net/tcp.h> +#include <net/ipv6.h> +#include <net/icmp.h> + +DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ); + +int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, + unsigned int len, struct tcp_sigpool *hp) +{ + struct scatterlist sg; + int ret; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp->req), + mkt->key, mkt->keylen)) + goto clear_hash; + + ret = crypto_ahash_init(hp->req); + if (ret) + goto clear_hash; + + sg_init_one(&sg, ctx, len); + ahash_request_set_crypt(hp->req, &sg, key, len); + crypto_ahash_update(hp->req); + + ret = crypto_ahash_final(hp->req); + if (ret) + goto clear_hash; + + return 0; +clear_hash: + memset(key, 0, tcp_ao_digest_size(mkt)); + return 1; +} + +bool tcp_ao_ignore_icmp(const struct sock *sk, int family, int type, int code) +{ + bool ignore_icmp = false; + struct tcp_ao_info *ao; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return false; + + /* RFC5925, 7.8: + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4 + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol + * unreachable, port unreachable, and fragmentation needed -- ’hard + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1 + * (administratively prohibited) and Code 4 (port unreachable) intended + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN- + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs. + */ + if (family == AF_INET) { + if (type != ICMP_DEST_UNREACH) + return false; + if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED) + return false; + } else { + if (type != ICMPV6_DEST_UNREACH) + return false; + if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH) + return false; + } + + rcu_read_lock(); + switch (sk->sk_state) { + case TCP_TIME_WAIT: + ao = rcu_dereference(tcp_twsk(sk)->ao_info); + break; + case TCP_SYN_SENT: + case TCP_SYN_RECV: + case TCP_LISTEN: + case TCP_NEW_SYN_RECV: + /* RFC5925 specifies to ignore ICMPs *only* on connections + * in synchronized states. + */ + rcu_read_unlock(); + return false; + default: + ao = rcu_dereference(tcp_sk(sk)->ao_info); + } + + if (ao && !ao->accept_icmps) { + ignore_icmp = true; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS); + atomic64_inc(&ao->counters.dropped_icmp); + } + rcu_read_unlock(); + + return ignore_icmp; +} + +/* Optimized version of tcp_ao_do_lookup(): only for sockets for which + * it's known that the keys in ao_info are matching peer's + * family/address/VRF/etc. + */ +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid) +{ + struct tcp_ao_key *key; + + hlist_for_each_entry_rcu(key, &ao->head, node) { + if ((sndid >= 0 && key->sndid != sndid) || + (rcvid >= 0 && key->rcvid != rcvid)) + continue; + return key; + } + + return NULL; +} + +static int ipv4_prefix_cmp(const struct in_addr *addr1, + const struct in_addr *addr2, + unsigned int prefixlen) +{ + __be32 mask = inet_make_mask(prefixlen); + __be32 a1 = addr1->s_addr & mask; + __be32 a2 = addr2->s_addr & mask; + + if (a1 == a2) + return 0; + return memcmp(&a1, &a2, sizeof(a1)); +} + +static int __tcp_ao_key_cmp(const struct tcp_ao_key *key, int l3index, + const union tcp_ao_addr *addr, u8 prefixlen, + int family, int sndid, int rcvid) +{ + if (sndid >= 0 && key->sndid != sndid) + return (key->sndid > sndid) ? 1 : -1; + if (rcvid >= 0 && key->rcvid != rcvid) + return (key->rcvid > rcvid) ? 1 : -1; + if (l3index >= 0 && (key->keyflags & TCP_AO_KEYF_IFINDEX)) { + if (key->l3index != l3index) + return (key->l3index > l3index) ? 1 : -1; + } + + if (family == AF_UNSPEC) + return 0; + if (key->family != family) + return (key->family > family) ? 1 : -1; + + if (family == AF_INET) { + if (ntohl(key->addr.a4.s_addr) == INADDR_ANY) + return 0; + if (ntohl(addr->a4.s_addr) == INADDR_ANY) + return 0; + return ipv4_prefix_cmp(&key->addr.a4, &addr->a4, prefixlen); +#if IS_ENABLED(CONFIG_IPV6) + } else { + if (ipv6_addr_any(&key->addr.a6) || ipv6_addr_any(&addr->a6)) + return 0; + if (ipv6_prefix_equal(&key->addr.a6, &addr->a6, prefixlen)) + return 0; + return memcmp(&key->addr.a6, &addr->a6, sizeof(addr->a6)); +#endif + } + return -1; +} + +static int tcp_ao_key_cmp(const struct tcp_ao_key *key, int l3index, + const union tcp_ao_addr *addr, u8 prefixlen, + int family, int sndid, int rcvid) +{ +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6 && ipv6_addr_v4mapped(&addr->a6)) { + __be32 addr4 = addr->a6.s6_addr32[3]; + + return __tcp_ao_key_cmp(key, l3index, + (union tcp_ao_addr *)&addr4, + prefixlen, AF_INET, sndid, rcvid); + } +#endif + return __tcp_ao_key_cmp(key, l3index, addr, + prefixlen, family, sndid, rcvid); +} + +static struct tcp_ao_key *__tcp_ao_do_lookup(const struct sock *sk, int l3index, + const union tcp_ao_addr *addr, int family, u8 prefix, + int sndid, int rcvid) +{ + struct tcp_ao_key *key; + struct tcp_ao_info *ao; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return NULL; + + ao = rcu_dereference_check(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return NULL; + + hlist_for_each_entry_rcu(key, &ao->head, node) { + u8 prefixlen = min(prefix, key->prefixlen); + + if (!tcp_ao_key_cmp(key, l3index, addr, prefixlen, + family, sndid, rcvid)) + return key; + } + return NULL; +} + +struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, int l3index, + const union tcp_ao_addr *addr, + int family, int sndid, int rcvid) +{ + return __tcp_ao_do_lookup(sk, l3index, addr, family, U8_MAX, sndid, rcvid); +} + +static struct tcp_ao_info *tcp_ao_alloc_info(gfp_t flags) +{ + struct tcp_ao_info *ao; + + ao = kzalloc(sizeof(*ao), flags); + if (!ao) + return NULL; + INIT_HLIST_HEAD(&ao->head); + refcount_set(&ao->refcnt, 1); + + return ao; +} + +static void tcp_ao_link_mkt(struct tcp_ao_info *ao, struct tcp_ao_key *mkt) +{ + hlist_add_head_rcu(&mkt->node, &ao->head); +} + +static struct tcp_ao_key *tcp_ao_copy_key(struct sock *sk, + struct tcp_ao_key *key) +{ + struct tcp_ao_key *new_key; + + new_key = sock_kmalloc(sk, tcp_ao_sizeof_key(key), + GFP_ATOMIC); + if (!new_key) + return NULL; + + *new_key = *key; + INIT_HLIST_NODE(&new_key->node); + tcp_sigpool_get(new_key->tcp_sigpool_id); + atomic64_set(&new_key->pkt_good, 0); + atomic64_set(&new_key->pkt_bad, 0); + + return new_key; +} + +static void tcp_ao_key_free_rcu(struct rcu_head *head) +{ + struct tcp_ao_key *key = container_of(head, struct tcp_ao_key, rcu); + + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); +} + +void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + struct hlist_node *n; + + if (twsk) { + ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1); + tcp_twsk(sk)->ao_info = NULL; + } else { + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); + tcp_sk(sk)->ao_info = NULL; + } + + if (!ao || !refcount_dec_and_test(&ao->refcnt)) + return; + + hlist_for_each_entry_safe(key, n, &ao->head, node) { + hlist_del_rcu(&key->node); + if (!twsk) + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + } + + kfree_rcu(ao, rcu); + static_branch_slow_dec_deferred(&tcp_ao_needed); +} + +void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) +{ + struct tcp_ao_info *ao_info = rcu_dereference_protected(tp->ao_info, 1); + + if (ao_info) { + struct tcp_ao_key *key; + struct hlist_node *n; + int omem = 0; + + hlist_for_each_entry_safe(key, n, &ao_info->head, node) { + omem += tcp_ao_sizeof_key(key); + } + + refcount_inc(&ao_info->refcnt); + atomic_sub(omem, &(((struct sock *)tp)->sk_omem_alloc)); + rcu_assign_pointer(tcptw->ao_info, ao_info); + } else { + tcptw->ao_info = NULL; + } +} + +/* 4 tuple and ISNs are expected in NBO */ +static int tcp_v4_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, + __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, + __be32 sisn, __be32 disn) +{ + /* See RFC5926 3.1.1 */ + struct kdf_input_block { + u8 counter; + u8 label[6]; + struct tcp4_ao_context ctx; + __be16 outlen; + } __packed * tmp; + struct tcp_sigpool hp; + int err; + + err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp); + if (err) + return err; + + tmp = hp.scratch; + tmp->counter = 1; + memcpy(tmp->label, "TCP-AO", 6); + tmp->ctx.saddr = saddr; + tmp->ctx.daddr = daddr; + tmp->ctx.sport = sport; + tmp->ctx.dport = dport; + tmp->ctx.sisn = sisn; + tmp->ctx.disn = disn; + tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */ + + err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp); + tcp_sigpool_end(&hp); + + return err; +} + +int tcp_v4_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send) +{ + if (send) + return tcp_v4_ao_calc_key(mkt, key, sk->sk_rcv_saddr, + sk->sk_daddr, htons(sk->sk_num), + sk->sk_dport, sisn, disn); + else + return tcp_v4_ao_calc_key(mkt, key, sk->sk_daddr, + sk->sk_rcv_saddr, sk->sk_dport, + htons(sk->sk_num), disn, sisn); +} + +static int tcp_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, + __be32 sisn, __be32 disn, bool send) +{ + if (mkt->family == AF_INET) + return tcp_v4_ao_calc_key_sk(mkt, key, sk, sisn, disn, send); +#if IS_ENABLED(CONFIG_IPV6) + else if (mkt->family == AF_INET6) + return tcp_v6_ao_calc_key_sk(mkt, key, sk, sisn, disn, send); +#endif + else + return -EOPNOTSUPP; +} + +int tcp_v4_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + return tcp_v4_ao_calc_key(mkt, key, + ireq->ir_loc_addr, ireq->ir_rmt_addr, + htons(ireq->ir_num), ireq->ir_rmt_port, + htonl(tcp_rsk(req)->snt_isn), + htonl(tcp_rsk(req)->rcv_isn)); +} + +static int tcp_v4_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + + return tcp_v4_ao_calc_key(mkt, key, iph->saddr, iph->daddr, + th->source, th->dest, sisn, disn); +} + +static int tcp_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn, int family) +{ + if (family == AF_INET) + return tcp_v4_ao_calc_key_skb(mkt, key, skb, sisn, disn); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + return tcp_v6_ao_calc_key_skb(mkt, key, skb, sisn, disn); +#endif + return -EAFNOSUPPORT; +} + +static int tcp_v4_ao_hash_pseudoheader(struct tcp_sigpool *hp, + __be32 daddr, __be32 saddr, + int nbytes) +{ + struct tcp4_pseudohdr *bp; + struct scatterlist sg; + + bp = hp->scratch; + bp->saddr = saddr; + bp->daddr = daddr; + bp->pad = 0; + bp->protocol = IPPROTO_TCP; + bp->len = cpu_to_be16(nbytes); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +static int tcp_ao_hash_pseudoheader(unsigned short int family, + const struct sock *sk, + const struct sk_buff *skb, + struct tcp_sigpool *hp, int nbytes) +{ + const struct tcphdr *th = tcp_hdr(skb); + + /* TODO: Can we rely on checksum being zero to mean outbound pkt? */ + if (!th->check) { + if (family == AF_INET) + return tcp_v4_ao_hash_pseudoheader(hp, sk->sk_daddr, + sk->sk_rcv_saddr, skb->len); +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + return tcp_v6_ao_hash_pseudoheader(hp, &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, skb->len); +#endif + else + return -EAFNOSUPPORT; + } + + if (family == AF_INET) { + const struct iphdr *iph = ip_hdr(skb); + + return tcp_v4_ao_hash_pseudoheader(hp, iph->daddr, + iph->saddr, skb->len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + return tcp_v6_ao_hash_pseudoheader(hp, &iph->daddr, + &iph->saddr, skb->len); +#endif + } + return -EAFNOSUPPORT; +} + +u32 tcp_ao_compute_sne(u32 next_sne, u32 next_seq, u32 seq) +{ + u32 sne = next_sne; + + if (before(seq, next_seq)) { + if (seq > next_seq) + sne--; + } else { + if (seq < next_seq) + sne++; + } + + return sne; +} + +/* tcp_ao_hash_sne(struct tcp_sigpool *hp) + * @hp - used for hashing + * @sne - sne value + */ +static int tcp_ao_hash_sne(struct tcp_sigpool *hp, u32 sne) +{ + struct scatterlist sg; + __be32 *bp; + + bp = (__be32 *)hp->scratch; + *bp = htonl(sne); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +static int tcp_ao_hash_header(struct tcp_sigpool *hp, + const struct tcphdr *th, + bool exclude_options, u8 *hash, + int hash_offset, int hash_len) +{ + int err, len = th->doff << 2; + struct scatterlist sg; + u8 *hdr = hp->scratch; + + /* We are not allowed to change tcphdr, make a local copy */ + if (exclude_options) { + len = sizeof(*th) + sizeof(struct tcp_ao_hdr) + hash_len; + memcpy(hdr, th, sizeof(*th)); + memcpy(hdr + sizeof(*th), + (u8 *)th + hash_offset - sizeof(struct tcp_ao_hdr), + sizeof(struct tcp_ao_hdr)); + memset(hdr + sizeof(*th) + sizeof(struct tcp_ao_hdr), + 0, hash_len); + ((struct tcphdr *)hdr)->check = 0; + } else { + len = th->doff << 2; + memcpy(hdr, th, len); + /* zero out tcp-ao hash */ + ((struct tcphdr *)hdr)->check = 0; + memset(hdr + hash_offset, 0, hash_len); + } + + sg_init_one(&sg, hdr, len); + ahash_request_set_crypt(hp->req, &sg, NULL, len); + err = crypto_ahash_update(hp->req); + WARN_ON_ONCE(err != 0); + return err; +} + +int tcp_ao_hash_hdr(unsigned short int family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne) +{ + int tkey_len = tcp_ao_digest_size(key); + int hash_offset = ao_hash - (char *)th; + struct tcp_sigpool hp; + void *hash_buf = NULL; + + hash_buf = kmalloc(tkey_len, GFP_ATOMIC); + if (!hash_buf) + goto clear_hash_noput; + + if (tcp_sigpool_start(key->tcp_sigpool_id, &hp)) + goto clear_hash_noput; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len)) + goto clear_hash; + + if (crypto_ahash_init(hp.req)) + goto clear_hash; + + if (tcp_ao_hash_sne(&hp, sne)) + goto clear_hash; + if (family == AF_INET) { + if (tcp_v4_ao_hash_pseudoheader(&hp, daddr->a4.s_addr, + saddr->a4.s_addr, th->doff * 4)) + goto clear_hash; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + if (tcp_v6_ao_hash_pseudoheader(&hp, &daddr->a6, + &saddr->a6, th->doff * 4)) + goto clear_hash; +#endif + } else { + WARN_ON_ONCE(1); + goto clear_hash; + } + if (tcp_ao_hash_header(&hp, th, + !!(key->keyflags & TCP_AO_KEYF_EXCLUDE_OPT), + ao_hash, hash_offset, tcp_ao_maclen(key))) + goto clear_hash; + ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); + if (crypto_ahash_final(hp.req)) + goto clear_hash; + + memcpy(ao_hash, hash_buf, tcp_ao_maclen(key)); + tcp_sigpool_end(&hp); + kfree(hash_buf); + return 0; + +clear_hash: + tcp_sigpool_end(&hp); +clear_hash_noput: + memset(ao_hash, 0, tcp_ao_maclen(key)); + kfree(hash_buf); + return 1; +} + +int tcp_ao_hash_skb(unsigned short int family, + char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + const struct tcphdr *th = tcp_hdr(skb); + int tkey_len = tcp_ao_digest_size(key); + struct tcp_sigpool hp; + void *hash_buf = NULL; + + hash_buf = kmalloc(tkey_len, GFP_ATOMIC); + if (!hash_buf) + goto clear_hash_noput; + + if (tcp_sigpool_start(key->tcp_sigpool_id, &hp)) + goto clear_hash_noput; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len)) + goto clear_hash; + + /* For now use sha1 by default. Depends on alg in tcp_ao_key */ + if (crypto_ahash_init(hp.req)) + goto clear_hash; + + if (tcp_ao_hash_sne(&hp, sne)) + goto clear_hash; + if (tcp_ao_hash_pseudoheader(family, sk, skb, &hp, skb->len)) + goto clear_hash; + if (tcp_ao_hash_header(&hp, th, + !!(key->keyflags & TCP_AO_KEYF_EXCLUDE_OPT), + ao_hash, hash_offset, tcp_ao_maclen(key))) + goto clear_hash; + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) + goto clear_hash; + ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); + if (crypto_ahash_final(hp.req)) + goto clear_hash; + + memcpy(ao_hash, hash_buf, tcp_ao_maclen(key)); + tcp_sigpool_end(&hp); + kfree(hash_buf); + return 0; + +clear_hash: + tcp_sigpool_end(&hp); +clear_hash_noput: + memset(ao_hash, 0, tcp_ao_maclen(key)); + kfree(hash_buf); + return 1; +} + +int tcp_v4_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + return tcp_ao_hash_skb(AF_INET, ao_hash, key, sk, skb, + tkey, hash_offset, sne); +} + +int tcp_v4_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne) +{ + void *hash_buf = NULL; + int err; + + hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC); + if (!hash_buf) + return -ENOMEM; + + err = tcp_v4_ao_calc_key_rsk(ao_key, hash_buf, req); + if (err) + goto out; + + err = tcp_ao_hash_skb(AF_INET, ao_hash, ao_key, req_to_sk(req), skb, + hash_buf, hash_offset, sne); +out: + kfree(hash_buf); + return err; +} + +struct tcp_ao_key *tcp_v4_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid) +{ + struct inet_request_sock *ireq = inet_rsk(req); + union tcp_ao_addr *addr = (union tcp_ao_addr *)&ireq->ir_rmt_addr; + int l3index; + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + return tcp_ao_do_lookup(sk, l3index, addr, AF_INET, sndid, rcvid); +} + +struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, + int sndid, int rcvid) +{ + int l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + addr_sk->sk_bound_dev_if); + union tcp_ao_addr *addr = (union tcp_ao_addr *)&addr_sk->sk_daddr; + + return tcp_ao_do_lookup(sk, l3index, addr, AF_INET, sndid, rcvid); +} + +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, u32 seq, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne) +{ + const struct tcphdr *th = tcp_hdr(skb); + struct tcp_ao_info *ao_info; + + *allocated_traffic_key = false; + /* If there's no socket - than initial sisn/disn are unknown. + * Drop the segment. RFC5925 (7.7) advises to require graceful + * restart [RFC4724]. Alternatively, the RFC5925 advises to + * save/restore traffic keys before/after reboot. + * Linux TCP-AO support provides TCP_AO_ADD_KEY and TCP_AO_REPAIR + * options to restore a socket post-reboot. + */ + if (!sk) + return -ENOTCONN; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { + unsigned int family = READ_ONCE(sk->sk_family); + union tcp_ao_addr *addr; + __be32 disn, sisn; + + if (sk->sk_state == TCP_NEW_SYN_RECV) { + struct request_sock *req = inet_reqsk(sk); + + sisn = htonl(tcp_rsk(req)->rcv_isn); + disn = htonl(tcp_rsk(req)->snt_isn); + *sne = tcp_ao_compute_sne(0, tcp_rsk(req)->snt_isn, seq); + } else { + sisn = th->seq; + disn = 0; + } + if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) + addr = (union tcp_md5_addr *)&ipv6_hdr(skb)->saddr; + else + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6 && ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + family = AF_INET; +#endif + + sk = sk_const_to_full_sk(sk); + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + if (!ao_info) + return -ENOENT; + *key = tcp_ao_do_lookup(sk, l3index, addr, family, + -1, aoh->rnext_keyid); + if (!*key) + return -ENOENT; + *traffic_key = kmalloc(tcp_ao_digest_size(*key), GFP_ATOMIC); + if (!*traffic_key) + return -ENOMEM; + *allocated_traffic_key = true; + if (tcp_ao_calc_key_skb(*key, *traffic_key, skb, + sisn, disn, family)) + return -1; + *keyid = (*key)->rcvid; + } else { + struct tcp_ao_key *rnext_key; + u32 snd_basis; + + if (sk->sk_state == TCP_TIME_WAIT) { + ao_info = rcu_dereference(tcp_twsk(sk)->ao_info); + snd_basis = tcp_twsk(sk)->tw_snd_nxt; + } else { + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + snd_basis = tcp_sk(sk)->snd_una; + } + if (!ao_info) + return -ENOENT; + + *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + if (!*key) + return -ENOENT; + *traffic_key = snd_other_key(*key); + rnext_key = READ_ONCE(ao_info->rnext_key); + *keyid = rnext_key->rcvid; + *sne = tcp_ao_compute_sne(READ_ONCE(ao_info->snd_sne), + snd_basis, seq); + } + return 0; +} + +int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, + struct tcp_ao_key *key, struct tcphdr *th, + __u8 *hash_location) +{ + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_info *ao; + void *tkey_buf = NULL; + u8 *traffic_key; + u32 sne; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + traffic_key = snd_other_key(key); + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { + __be32 disn; + + if (!(tcb->tcp_flags & TCPHDR_ACK)) { + disn = 0; + tkey_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!tkey_buf) + return -ENOMEM; + traffic_key = tkey_buf; + } else { + disn = ao->risn; + } + tp->af_specific->ao_calc_key_sk(key, traffic_key, + sk, ao->lisn, disn, true); + } + sne = tcp_ao_compute_sne(READ_ONCE(ao->snd_sne), READ_ONCE(tp->snd_una), + ntohl(th->seq)); + tp->af_specific->calc_ao_hash(hash_location, key, sk, skb, traffic_key, + hash_location - (u8 *)th, sne); + kfree(tkey_buf); + return 0; +} + +static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family, + const struct sock *sk, const struct sk_buff *skb, + int sndid, int rcvid, int l3index) +{ + if (family == AF_INET) { + const struct iphdr *iph = ip_hdr(skb); + + return tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)&iph->saddr, + AF_INET, sndid, rcvid); + } else { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + return tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)&iph->saddr, + AF_INET6, sndid, rcvid); + } +} + +void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, + struct tcp_request_sock *treq, + unsigned short int family, int l3index) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + struct tcp_ao_key *key; + + treq->used_tcp_ao = false; + + if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) + return; + + key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); + if (!key) + /* Key not found, continue without TCP-AO */ + return; + + treq->ao_rcv_next = aoh->keyid; + treq->ao_keyid = aoh->rnext_keyid; + treq->used_tcp_ao = true; +} + +static enum skb_drop_reason +tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, + unsigned short int family, struct tcp_ao_info *info, + const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key, + u8 *traffic_key, u8 *phash, u32 sne, int l3index) +{ + u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr); + const struct tcphdr *th = tcp_hdr(skb); + void *hash_buf = NULL; + + if (maclen != tcp_ao_maclen(key)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + atomic64_inc(&info->counters.pkt_bad); + atomic64_inc(&key->pkt_bad); + tcp_hash_fail("AO hash wrong length", family, skb, + "%u != %d L3index: %d", maclen, + tcp_ao_maclen(key), l3index); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + + hash_buf = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!hash_buf) + return SKB_DROP_REASON_NOT_SPECIFIED; + + /* XXX: make it per-AF callback? */ + tcp_ao_hash_skb(family, hash_buf, key, sk, skb, traffic_key, + (phash - (u8 *)th), sne); + if (memcmp(phash, hash_buf, maclen)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + atomic64_inc(&info->counters.pkt_bad); + atomic64_inc(&key->pkt_bad); + tcp_hash_fail("AO hash mismatch", family, skb, + "L3index: %d", l3index); + kfree(hash_buf); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOGOOD); + atomic64_inc(&info->counters.pkt_good); + atomic64_inc(&key->pkt_good); + kfree(hash_buf); + return SKB_NOT_DROPPED_YET; +} + +enum skb_drop_reason +tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, + unsigned short int family, const struct request_sock *req, + int l3index, const struct tcp_ao_hdr *aoh) +{ + const struct tcphdr *th = tcp_hdr(skb); + u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */ + struct tcp_ao_info *info; + enum skb_drop_reason ret; + struct tcp_ao_key *key; + __be32 sisn, disn; + u8 *traffic_key; + u32 sne = 0; + + info = rcu_dereference(tcp_sk(sk)->ao_info); + if (!info) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); + tcp_hash_fail("AO key not found", family, skb, + "keyid: %u L3index: %d", aoh->keyid, l3index); + return SKB_DROP_REASON_TCP_AOUNEXPECTED; + } + + if (unlikely(th->syn)) { + sisn = th->seq; + disn = 0; + } + + /* Fast-path */ + if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) { + enum skb_drop_reason err; + struct tcp_ao_key *current_key; + + /* Check if this socket's rnext_key matches the keyid in the + * packet. If not we lookup the key based on the keyid + * matching the rcvid in the mkt. + */ + key = READ_ONCE(info->rnext_key); + if (key->rcvid != aoh->keyid) { + key = tcp_ao_established_key(info, -1, aoh->keyid); + if (!key) + goto key_not_found; + } + + /* Delayed retransmitted SYN */ + if (unlikely(th->syn && !th->ack)) + goto verify_hash; + + sne = tcp_ao_compute_sne(info->rcv_sne, tcp_sk(sk)->rcv_nxt, + ntohl(th->seq)); + /* Established socket, traffic key are cached */ + traffic_key = rcv_other_key(key); + err = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, + traffic_key, phash, sne, l3index); + if (err) + return err; + current_key = READ_ONCE(info->current_key); + /* Key rotation: the peer asks us to use new key (RNext) */ + if (unlikely(aoh->rnext_keyid != current_key->sndid)) { + /* If the key is not found we do nothing. */ + key = tcp_ao_established_key(info, aoh->rnext_keyid, -1); + if (key) + /* pairs with tcp_ao_del_cmd */ + WRITE_ONCE(info->current_key, key); + } + return SKB_NOT_DROPPED_YET; + } + + /* Lookup key based on peer address and keyid. + * current_key and rnext_key must not be used on tcp listen + * sockets as otherwise: + * - request sockets would race on those key pointers + * - tcp_ao_del_cmd() allows async key removal + */ + key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); + if (!key) + goto key_not_found; + + if (th->syn && !th->ack) + goto verify_hash; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { + /* Make the initial syn the likely case here */ + if (unlikely(req)) { + sne = tcp_ao_compute_sne(0, tcp_rsk(req)->rcv_isn, + ntohl(th->seq)); + sisn = htonl(tcp_rsk(req)->rcv_isn); + disn = htonl(tcp_rsk(req)->snt_isn); + } else if (unlikely(th->ack && !th->syn)) { + /* Possible syncookie packet */ + sisn = htonl(ntohl(th->seq) - 1); + disn = htonl(ntohl(th->ack_seq) - 1); + sne = tcp_ao_compute_sne(0, ntohl(sisn), + ntohl(th->seq)); + } else if (unlikely(!th->syn)) { + /* no way to figure out initial sisn/disn - drop */ + return SKB_DROP_REASON_TCP_FLAGS; + } + } else if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { + disn = info->lisn; + if (th->syn || th->rst) + sisn = th->seq; + else + sisn = info->risn; + } else { + WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", sk->sk_state); + return SKB_DROP_REASON_TCP_AOFAILURE; + } +verify_hash: + traffic_key = kmalloc(tcp_ao_digest_size(key), GFP_ATOMIC); + if (!traffic_key) + return SKB_DROP_REASON_NOT_SPECIFIED; + tcp_ao_calc_key_skb(key, traffic_key, skb, sisn, disn, family); + ret = tcp_ao_verify_hash(sk, skb, family, info, aoh, key, + traffic_key, phash, sne, l3index); + kfree(traffic_key); + return ret; + +key_not_found: + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); + atomic64_inc(&info->counters.key_not_found); + tcp_hash_fail("Requested by the peer AO key id not found", + family, skb, "L3index: %d", l3index); + return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; +} + +static int tcp_ao_cache_traffic_keys(const struct sock *sk, + struct tcp_ao_info *ao, + struct tcp_ao_key *ao_key) +{ + u8 *traffic_key = snd_other_key(ao_key); + int ret; + + ret = tcp_ao_calc_key_sk(ao_key, traffic_key, sk, + ao->lisn, ao->risn, true); + if (ret) + return ret; + + traffic_key = rcv_other_key(ao_key); + ret = tcp_ao_calc_key_sk(ao_key, traffic_key, sk, + ao->lisn, ao->risn, false); + return ret; +} + +void tcp_ao_connect_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_key *key; + int family, l3index; + + ao_info = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (!ao_info) + return; + + /* Remove all keys that don't match the peer */ + family = sk->sk_family; + if (family == AF_INET) + addr = (union tcp_ao_addr *)&sk->sk_daddr; +#if IS_ENABLED(CONFIG_IPV6) + else if (family == AF_INET6) + addr = (union tcp_ao_addr *)&sk->sk_v6_daddr; +#endif + else + return; + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + sk->sk_bound_dev_if); + + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (!tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1)) + continue; + + if (key == ao_info->current_key) + ao_info->current_key = NULL; + if (key == ao_info->rnext_key) + ao_info->rnext_key = NULL; + hlist_del_rcu(&key->node); + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + } + + key = tp->af_specific->ao_lookup(sk, sk, -1, -1); + if (key) { + /* if current_key or rnext_key were not provided, + * use the first key matching the peer + */ + if (!ao_info->current_key) + ao_info->current_key = key; + if (!ao_info->rnext_key) + ao_info->rnext_key = key; + tp->tcp_header_len += tcp_ao_len_aligned(key); + + ao_info->lisn = htonl(tp->write_seq); + ao_info->snd_sne = 0; + } else { + /* Can't happen: tcp_connect() verifies that there's + * at least one tcp-ao key that matches the remote peer. + */ + WARN_ON_ONCE(1); + rcu_assign_pointer(tp->ao_info, NULL); + kfree(ao_info); + } +} + +void tcp_ao_established(struct sock *sk) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return; + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); +} + +void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_ao_info *ao; + struct tcp_ao_key *key; + + ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + if (!ao) + return; + + WRITE_ONCE(ao->risn, tcp_hdr(skb)->seq); + ao->rcv_sne = 0; + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); +} + +int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk, + struct request_sock *req, struct sk_buff *skb, + int family) +{ + struct tcp_ao_key *key, *new_key, *first_key; + struct tcp_ao_info *new_ao, *ao; + struct hlist_node *key_head; + int l3index, ret = -ENOMEM; + union tcp_ao_addr *addr; + bool match = false; + + ao = rcu_dereference(tcp_sk(sk)->ao_info); + if (!ao) + return 0; + + /* New socket without TCP-AO on it */ + if (!tcp_rsk_used_ao(req)) + return 0; + + new_ao = tcp_ao_alloc_info(GFP_ATOMIC); + if (!new_ao) + return -ENOMEM; + new_ao->lisn = htonl(tcp_rsk(req)->snt_isn); + new_ao->risn = htonl(tcp_rsk(req)->rcv_isn); + new_ao->ao_required = ao->ao_required; + new_ao->accept_icmps = ao->accept_icmps; + + if (family == AF_INET) { + addr = (union tcp_ao_addr *)&newsk->sk_daddr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + addr = (union tcp_ao_addr *)&newsk->sk_v6_daddr; +#endif + } else { + ret = -EAFNOSUPPORT; + goto free_ao; + } + l3index = l3mdev_master_ifindex_by_index(sock_net(newsk), + newsk->sk_bound_dev_if); + + hlist_for_each_entry_rcu(key, &ao->head, node) { + if (tcp_ao_key_cmp(key, l3index, addr, key->prefixlen, family, -1, -1)) + continue; + + new_key = tcp_ao_copy_key(newsk, key); + if (!new_key) + goto free_and_exit; + + tcp_ao_cache_traffic_keys(newsk, new_ao, new_key); + tcp_ao_link_mkt(new_ao, new_key); + match = true; + } + + if (!match) { + /* RFC5925 (7.4.1) specifies that the TCP-AO status + * of a connection is determined on the initial SYN. + * At this point the connection was TCP-AO enabled, so + * it can't switch to being unsigned if peer's key + * disappears on the listening socket. + */ + ret = -EKEYREJECTED; + goto free_and_exit; + } + + if (!static_key_fast_inc_not_disabled(&tcp_ao_needed.key.key)) { + ret = -EUSERS; + goto free_and_exit; + } + + key_head = rcu_dereference(hlist_first_rcu(&new_ao->head)); + first_key = hlist_entry_safe(key_head, struct tcp_ao_key, node); + + key = tcp_ao_established_key(new_ao, tcp_rsk(req)->ao_keyid, -1); + if (key) + new_ao->current_key = key; + else + new_ao->current_key = first_key; + + /* set rnext_key */ + key = tcp_ao_established_key(new_ao, -1, tcp_rsk(req)->ao_rcv_next); + if (key) + new_ao->rnext_key = key; + else + new_ao->rnext_key = first_key; + + sk_gso_disable(newsk); + rcu_assign_pointer(tcp_sk(newsk)->ao_info, new_ao); + + return 0; + +free_and_exit: + hlist_for_each_entry_safe(key, key_head, &new_ao->head, node) { + hlist_del(&key->node); + tcp_sigpool_release(key->tcp_sigpool_id); + atomic_sub(tcp_ao_sizeof_key(key), &newsk->sk_omem_alloc); + kfree_sensitive(key); + } +free_ao: + kfree(new_ao); + return ret; +} + +static bool tcp_ao_can_set_current_rnext(struct sock *sk) +{ + /* There aren't current/rnext keys on TCP_LISTEN sockets */ + if (sk->sk_state == TCP_LISTEN) + return false; + return true; +} + +static int tcp_ao_verify_ipv4(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **addr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd->addr; + struct inet_sock *inet = inet_sk(sk); + + if (sin->sin_family != AF_INET) + return -EINVAL; + + /* Currently matching is not performed on port (or port ranges) */ + if (sin->sin_port != 0) + return -EINVAL; + + /* Check prefix and trailing 0's in addr */ + if (cmd->prefix != 0) { + __be32 mask; + + if (ntohl(sin->sin_addr.s_addr) == INADDR_ANY) + return -EINVAL; + if (cmd->prefix > 32) + return -EINVAL; + + mask = inet_make_mask(cmd->prefix); + if (sin->sin_addr.s_addr & ~mask) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (ntohl(inet->inet_daddr) != INADDR_ANY && + (inet->inet_daddr & mask) != sin->sin_addr.s_addr) + return -EINVAL; + } else { + if (ntohl(sin->sin_addr.s_addr) != INADDR_ANY) + return -EINVAL; + } + + *addr = (union tcp_ao_addr *)&sin->sin_addr; + return 0; +} + +static int tcp_ao_parse_crypto(struct tcp_ao_add *cmd, struct tcp_ao_key *key) +{ + unsigned int syn_tcp_option_space; + bool is_kdf_aes_128_cmac = false; + struct crypto_ahash *tfm; + struct tcp_sigpool hp; + void *tmp_key = NULL; + int err; + + /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */ + if (!strcmp("cmac(aes128)", cmd->alg_name)) { + strscpy(cmd->alg_name, "cmac(aes)", sizeof(cmd->alg_name)); + is_kdf_aes_128_cmac = (cmd->keylen != 16); + tmp_key = kmalloc(cmd->keylen, GFP_KERNEL); + if (!tmp_key) + return -ENOMEM; + } + + key->maclen = cmd->maclen ?: 12; /* 12 is the default in RFC5925 */ + + /* Check: maclen + tcp-ao header <= (MAX_TCP_OPTION_SPACE - mss + * - tstamp (including sackperm) + * - wscale), + * see tcp_syn_options(), tcp_synack_options(), commit 33ad798c924b. + * + * In order to allow D-SACK with TCP-AO, the header size should be: + * (MAX_TCP_OPTION_SPACE - TCPOLEN_TSTAMP_ALIGNED + * - TCPOLEN_SACK_BASE_ALIGNED + * - 2 * TCPOLEN_SACK_PERBLOCK) = 8 (maclen = 4), + * see tcp_established_options(). + * + * RFC5925, 2.2: + * Typical MACs are 96-128 bits (12-16 bytes), but any length + * that fits in the header of the segment being authenticated + * is allowed. + * + * RFC5925, 7.6: + * TCP-AO continues to consume 16 bytes in non-SYN segments, + * leaving a total of 24 bytes for other options, of which + * the timestamp consumes 10. This leaves 14 bytes, of which 10 + * are used for a single SACK block. When two SACK blocks are used, + * such as to handle D-SACK, a smaller TCP-AO MAC would be required + * to make room for the additional SACK block (i.e., to leave 18 + * bytes for the D-SACK variant of the SACK option) [RFC2883]. + * Note that D-SACK is not supportable in TCP MD5 in the presence + * of timestamps, because TCP MD5’s MAC length is fixed and too + * large to leave sufficient option space. + */ + syn_tcp_option_space = MAX_TCP_OPTION_SPACE; + syn_tcp_option_space -= TCPOLEN_MSS_ALIGNED; + syn_tcp_option_space -= TCPOLEN_TSTAMP_ALIGNED; + syn_tcp_option_space -= TCPOLEN_WSCALE_ALIGNED; + if (tcp_ao_len_aligned(key) > syn_tcp_option_space) { + err = -EMSGSIZE; + goto err_kfree; + } + + key->keylen = cmd->keylen; + memcpy(key->key, cmd->key, cmd->keylen); + + err = tcp_sigpool_start(key->tcp_sigpool_id, &hp); + if (err) + goto err_kfree; + + tfm = crypto_ahash_reqtfm(hp.req); + if (is_kdf_aes_128_cmac) { + void *scratch = hp.scratch; + struct scatterlist sg; + + memcpy(tmp_key, cmd->key, cmd->keylen); + sg_init_one(&sg, tmp_key, cmd->keylen); + + /* Using zero-key of 16 bytes as described in RFC5926 */ + memset(scratch, 0, 16); + err = crypto_ahash_setkey(tfm, scratch, 16); + if (err) + goto err_pool_end; + + err = crypto_ahash_init(hp.req); + if (err) + goto err_pool_end; + + ahash_request_set_crypt(hp.req, &sg, key->key, cmd->keylen); + err = crypto_ahash_update(hp.req); + if (err) + goto err_pool_end; + + err |= crypto_ahash_final(hp.req); + if (err) + goto err_pool_end; + key->keylen = 16; + } + + err = crypto_ahash_setkey(tfm, key->key, key->keylen); + if (err) + goto err_pool_end; + + tcp_sigpool_end(&hp); + kfree_sensitive(tmp_key); + + if (tcp_ao_maclen(key) > key->digest_size) + return -EINVAL; + + return 0; + +err_pool_end: + tcp_sigpool_end(&hp); +err_kfree: + kfree_sensitive(tmp_key); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int tcp_ao_verify_ipv6(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **paddr, + unsigned short int *family) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd->addr; + struct in6_addr *addr = &sin6->sin6_addr; + u8 prefix = cmd->prefix; + + if (sin6->sin6_family != AF_INET6) + return -EINVAL; + + /* Currently matching is not performed on port (or port ranges) */ + if (sin6->sin6_port != 0) + return -EINVAL; + + /* Check prefix and trailing 0's in addr */ + if (cmd->prefix != 0 && ipv6_addr_v4mapped(addr)) { + __be32 addr4 = addr->s6_addr32[3]; + __be32 mask; + + if (prefix > 32 || ntohl(addr4) == INADDR_ANY) + return -EINVAL; + + mask = inet_make_mask(prefix); + if (addr4 & ~mask) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + __be32 daddr4 = sk->sk_v6_daddr.s6_addr32[3]; + + if (!ipv6_addr_v4mapped(&sk->sk_v6_daddr)) + return -EINVAL; + if ((daddr4 & mask) != addr4) + return -EINVAL; + } + + *paddr = (union tcp_ao_addr *)&addr->s6_addr32[3]; + *family = AF_INET; + return 0; + } else if (cmd->prefix != 0) { + struct in6_addr pfx; + + if (ipv6_addr_any(addr) || prefix > 128) + return -EINVAL; + + ipv6_addr_prefix(&pfx, addr, prefix); + if (ipv6_addr_cmp(&pfx, addr)) + return -EINVAL; + + /* Check that MKT address is consistent with socket */ + if (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_prefix_equal(&sk->sk_v6_daddr, addr, prefix)) + + return -EINVAL; + } else { + if (!ipv6_addr_any(addr)) + return -EINVAL; + } + + *paddr = (union tcp_ao_addr *)addr; + return 0; +} +#else +static int tcp_ao_verify_ipv6(struct sock *sk, struct tcp_ao_add *cmd, + union tcp_ao_addr **paddr, + unsigned short int *family) +{ + return -EOPNOTSUPP; +} +#endif + +static struct tcp_ao_info *setsockopt_ao_info(struct sock *sk) +{ + if (sk_fullsock(sk)) { + return rcu_dereference_protected(tcp_sk(sk)->ao_info, + lockdep_sock_is_held(sk)); + } else if (sk->sk_state == TCP_TIME_WAIT) { + return rcu_dereference_protected(tcp_twsk(sk)->ao_info, + lockdep_sock_is_held(sk)); + } + return ERR_PTR(-ESOCKTNOSUPPORT); +} + +static struct tcp_ao_info *getsockopt_ao_info(struct sock *sk) +{ + if (sk_fullsock(sk)) + return rcu_dereference(tcp_sk(sk)->ao_info); + else if (sk->sk_state == TCP_TIME_WAIT) + return rcu_dereference(tcp_twsk(sk)->ao_info); + + return ERR_PTR(-ESOCKTNOSUPPORT); +} + +#define TCP_AO_KEYF_ALL (TCP_AO_KEYF_IFINDEX | TCP_AO_KEYF_EXCLUDE_OPT) +#define TCP_AO_GET_KEYF_VALID (TCP_AO_KEYF_IFINDEX) + +static struct tcp_ao_key *tcp_ao_key_alloc(struct sock *sk, + struct tcp_ao_add *cmd) +{ + const char *algo = cmd->alg_name; + unsigned int digest_size; + struct crypto_ahash *tfm; + struct tcp_ao_key *key; + struct tcp_sigpool hp; + int err, pool_id; + size_t size; + + /* Force null-termination of alg_name */ + cmd->alg_name[ARRAY_SIZE(cmd->alg_name) - 1] = '\0'; + + /* RFC5926, 3.1.1.2. KDF_AES_128_CMAC */ + if (!strcmp("cmac(aes128)", algo)) + algo = "cmac(aes)"; + + /* Full TCP header (th->doff << 2) should fit into scratch area, + * see tcp_ao_hash_header(). + */ + pool_id = tcp_sigpool_alloc_ahash(algo, 60); + if (pool_id < 0) + return ERR_PTR(pool_id); + + err = tcp_sigpool_start(pool_id, &hp); + if (err) + goto err_free_pool; + + tfm = crypto_ahash_reqtfm(hp.req); + digest_size = crypto_ahash_digestsize(tfm); + tcp_sigpool_end(&hp); + + size = sizeof(struct tcp_ao_key) + (digest_size << 1); + key = sock_kmalloc(sk, size, GFP_KERNEL); + if (!key) { + err = -ENOMEM; + goto err_free_pool; + } + + key->tcp_sigpool_id = pool_id; + key->digest_size = digest_size; + return key; + +err_free_pool: + tcp_sigpool_release(pool_id); + return ERR_PTR(err); +} + +static int tcp_ao_add_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_key *key; + struct tcp_ao_add cmd; + int ret, l3index = 0; + bool first = false; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + ret = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (ret) + return ret; + + if (cmd.keylen > TCP_AO_MAXKEYLEN) + return -EINVAL; + + if (cmd.reserved != 0 || cmd.reserved2 != 0) + return -EINVAL; + + if (family == AF_INET) + ret = tcp_ao_verify_ipv4(sk, &cmd, &addr); + else + ret = tcp_ao_verify_ipv6(sk, &cmd, &addr, &family); + if (ret) + return ret; + + if (cmd.keyflags & ~TCP_AO_KEYF_ALL) + return -EINVAL; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + if (cmd.ifindex && !(cmd.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; + + /* For cmd.tcp_ifindex = 0 the key will apply to the default VRF */ + if (cmd.keyflags & TCP_AO_KEYF_IFINDEX && cmd.ifindex) { + int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(sock_net(sk), cmd.ifindex); + if (dev && netif_is_l3_master(dev)) + l3index = dev->ifindex; + rcu_read_unlock(); + + if (!dev || !l3index) + return -EINVAL; + + if (!bound_dev_if || bound_dev_if != cmd.ifindex) { + /* tcp_ao_established_key() doesn't expect having + * non peer-matching key on an established TCP-AO + * connection. + */ + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) + return -EINVAL; + } + + /* It's still possible to bind after adding keys or even + * re-bind to a different dev (with CAP_NET_RAW). + * So, no reason to return error here, rather try to be + * nice and warn the user. + */ + if (bound_dev_if && bound_dev_if != cmd.ifindex) + net_warn_ratelimited("AO key ifindex %d != sk bound ifindex %d\n", + cmd.ifindex, bound_dev_if); + } + + /* Don't allow keys for peers that have a matching TCP-MD5 key */ + if (cmd.keyflags & TCP_AO_KEYF_IFINDEX) { + /* Non-_exact version of tcp_md5_do_lookup() will + * as well match keys that aren't bound to a specific VRF + * (that will make them match AO key with + * sysctl_tcp_l3dev_accept = 1 + */ + if (tcp_md5_do_lookup(sk, l3index, addr, family)) + return -EKEYREJECTED; + } else { + if (tcp_md5_do_lookup_any_l3index(sk, addr, family)) + return -EKEYREJECTED; + } + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + + if (!ao_info) { + ao_info = tcp_ao_alloc_info(GFP_KERNEL); + if (!ao_info) + return -ENOMEM; + first = true; + } else { + /* Check that neither RecvID nor SendID match any + * existing key for the peer, RFC5925 3.1: + * > The IDs of MKTs MUST NOT overlap where their + * > TCP connection identifiers overlap. + */ + if (__tcp_ao_do_lookup(sk, l3index, addr, family, cmd.prefix, -1, cmd.rcvid)) + return -EEXIST; + if (__tcp_ao_do_lookup(sk, l3index, addr, family, + cmd.prefix, cmd.sndid, -1)) + return -EEXIST; + } + + key = tcp_ao_key_alloc(sk, &cmd); + if (IS_ERR(key)) { + ret = PTR_ERR(key); + goto err_free_ao; + } + + INIT_HLIST_NODE(&key->node); + memcpy(&key->addr, addr, (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr)); + key->prefixlen = cmd.prefix; + key->family = family; + key->keyflags = cmd.keyflags; + key->sndid = cmd.sndid; + key->rcvid = cmd.rcvid; + key->l3index = l3index; + atomic64_set(&key->pkt_good, 0); + atomic64_set(&key->pkt_bad, 0); + + ret = tcp_ao_parse_crypto(&cmd, key); + if (ret < 0) + goto err_free_sock; + + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) { + tcp_ao_cache_traffic_keys(sk, ao_info, key); + if (first) { + ao_info->current_key = key; + ao_info->rnext_key = key; + } + } + + tcp_ao_link_mkt(ao_info, key); + if (first) { + if (!static_branch_inc(&tcp_ao_needed.key)) { + ret = -EUSERS; + goto err_free_sock; + } + sk_gso_disable(sk); + rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info); + } + + if (cmd.set_current) + WRITE_ONCE(ao_info->current_key, key); + if (cmd.set_rnext) + WRITE_ONCE(ao_info->rnext_key, key); + return 0; + +err_free_sock: + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); +err_free_ao: + if (first) + kfree(ao_info); + return ret; +} + +static int tcp_ao_delete_key(struct sock *sk, struct tcp_ao_info *ao_info, + bool del_async, struct tcp_ao_key *key, + struct tcp_ao_key *new_current, + struct tcp_ao_key *new_rnext) +{ + int err; + + hlist_del_rcu(&key->node); + + /* Support for async delete on listening sockets: as they don't + * need current_key/rnext_key maintaining, we don't need to check + * them and we can just free all resources in RCU fashion. + */ + if (del_async) { + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + return 0; + } + + /* At this moment another CPU could have looked this key up + * while it was unlinked from the list. Wait for RCU grace period, + * after which the key is off-list and can't be looked up again; + * the rx path [just before RCU came] might have used it and set it + * as current_key (very unlikely). + * Free the key with next RCU grace period (in case it was + * current_key before tcp_ao_current_rnext() might have + * changed it in forced-delete). + */ + synchronize_rcu(); + if (new_current) + WRITE_ONCE(ao_info->current_key, new_current); + if (new_rnext) + WRITE_ONCE(ao_info->rnext_key, new_rnext); + + if (unlikely(READ_ONCE(ao_info->current_key) == key || + READ_ONCE(ao_info->rnext_key) == key)) { + err = -EBUSY; + goto add_key; + } + + atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); + call_rcu(&key->rcu, tcp_ao_key_free_rcu); + + return 0; +add_key: + hlist_add_head_rcu(&key->node, &ao_info->head); + return err; +} + +#define TCP_AO_DEL_KEYF_ALL (TCP_AO_KEYF_IFINDEX) +static int tcp_ao_del_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_key *key, *new_current = NULL, *new_rnext = NULL; + int err, addr_len, l3index = 0; + struct tcp_ao_info *ao_info; + union tcp_ao_addr *addr; + struct tcp_ao_del cmd; + __u8 prefix; + u16 port; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (cmd.reserved != 0 || cmd.reserved2 != 0) + return -EINVAL; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + if (cmd.keyflags & ~TCP_AO_DEL_KEYF_ALL) + return -EINVAL; + + /* No sanity check for TCP_AO_KEYF_IFINDEX as if a VRF + * was destroyed, there still should be a way to delete keys, + * that were bound to that l3intf. So, fail late at lookup stage + * if there is no key for that ifindex. + */ + if (cmd.ifindex && !(cmd.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) + return -ENOENT; + + /* For sockets in TCP_CLOSED it's possible set keys that aren't + * matching the future peer (address/VRF/etc), + * tcp_ao_connect_init() will choose a correct matching MKT + * if there's any. + */ + if (cmd.set_current) { + new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + if (!new_current) + return -ENOENT; + } + if (cmd.set_rnext) { + new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + if (!new_rnext) + return -ENOENT; + } + if (cmd.del_async && sk->sk_state != TCP_LISTEN) + return -EINVAL; + + if (family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.addr; + + addr = (union tcp_ao_addr *)&sin->sin_addr; + addr_len = sizeof(struct in_addr); + port = ntohs(sin->sin_port); + } else { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.addr; + struct in6_addr *addr6 = &sin6->sin6_addr; + + if (ipv6_addr_v4mapped(addr6)) { + addr = (union tcp_ao_addr *)&addr6->s6_addr32[3]; + addr_len = sizeof(struct in_addr); + family = AF_INET; + } else { + addr = (union tcp_ao_addr *)addr6; + addr_len = sizeof(struct in6_addr); + } + port = ntohs(sin6->sin6_port); + } + prefix = cmd.prefix; + + /* Currently matching is not performed on port (or port ranges) */ + if (port != 0) + return -EINVAL; + + /* We could choose random present key here for current/rnext + * but that's less predictable. Let's be strict and don't + * allow removing a key that's in use. RFC5925 doesn't + * specify how-to coordinate key removal, but says: + * "It is presumed that an MKT affecting a particular + * connection cannot be destroyed during an active connection" + */ + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (cmd.sndid != key->sndid || + cmd.rcvid != key->rcvid) + continue; + + if (family != key->family || + prefix != key->prefixlen || + memcmp(addr, &key->addr, addr_len)) + continue; + + if ((cmd.keyflags & TCP_AO_KEYF_IFINDEX) != + (key->keyflags & TCP_AO_KEYF_IFINDEX)) + continue; + + if (key->l3index != l3index) + continue; + + if (key == new_current || key == new_rnext) + continue; + + return tcp_ao_delete_key(sk, ao_info, cmd.del_async, key, + new_current, new_rnext); + } + return -ENOENT; +} + +/* cmd.ao_required makes a socket TCP-AO only. + * Don't allow any md5 keys for any l3intf on the socket together with it. + * Restricting it early in setsockopt() removes a check for + * ao_info->ao_required on inbound tcp segment fast-path. + */ +static int tcp_ao_required_verify(struct sock *sk) +{ +#ifdef CONFIG_TCP_MD5SIG + const struct tcp_md5sig_info *md5sig; + + if (!static_branch_unlikely(&tcp_md5_needed.key)) + return 0; + + md5sig = rcu_dereference_check(tcp_sk(sk)->md5sig_info, + lockdep_sock_is_held(sk)); + if (!md5sig) + return 0; + + if (rcu_dereference_check(hlist_first_rcu(&md5sig->head), + lockdep_sock_is_held(sk))) + return 1; +#endif + return 0; +} + +static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family, + sockptr_t optval, int optlen) +{ + struct tcp_ao_key *new_current = NULL, *new_rnext = NULL; + struct tcp_ao_info *ao_info; + struct tcp_ao_info_opt cmd; + bool first = false; + int err; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (cmd.set_current || cmd.set_rnext) { + if (!tcp_ao_can_set_current_rnext(sk)) + return -EINVAL; + } + + if (cmd.reserved != 0 || cmd.reserved2 != 0) + return -EINVAL; + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) { + if (!((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))) + return -EINVAL; + ao_info = tcp_ao_alloc_info(GFP_KERNEL); + if (!ao_info) + return -ENOMEM; + first = true; + } + + if (cmd.ao_required && tcp_ao_required_verify(sk)) + return -EKEYREJECTED; + + /* For sockets in TCP_CLOSED it's possible set keys that aren't + * matching the future peer (address/port/VRF/etc), + * tcp_ao_connect_init() will choose a correct matching MKT + * if there's any. + */ + if (cmd.set_current) { + new_current = tcp_ao_established_key(ao_info, cmd.current_key, -1); + if (!new_current) { + err = -ENOENT; + goto out; + } + } + if (cmd.set_rnext) { + new_rnext = tcp_ao_established_key(ao_info, -1, cmd.rnext); + if (!new_rnext) { + err = -ENOENT; + goto out; + } + } + if (cmd.set_counters) { + atomic64_set(&ao_info->counters.pkt_good, cmd.pkt_good); + atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad); + atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found); + atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required); + atomic64_set(&ao_info->counters.dropped_icmp, cmd.pkt_dropped_icmp); + } + + ao_info->ao_required = cmd.ao_required; + ao_info->accept_icmps = cmd.accept_icmps; + if (new_current) + WRITE_ONCE(ao_info->current_key, new_current); + if (new_rnext) + WRITE_ONCE(ao_info->rnext_key, new_rnext); + if (first) { + if (!static_branch_inc(&tcp_ao_needed.key)) { + err = -EUSERS; + goto out; + } + sk_gso_disable(sk); + rcu_assign_pointer(tcp_sk(sk)->ao_info, ao_info); + } + return 0; +out: + if (first) + kfree(ao_info); + return err; +} + +int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, + sockptr_t optval, int optlen) +{ + if (WARN_ON_ONCE(family != AF_INET && family != AF_INET6)) + return -EAFNOSUPPORT; + + switch (cmd) { + case TCP_AO_ADD_KEY: + return tcp_ao_add_cmd(sk, family, optval, optlen); + case TCP_AO_DEL_KEY: + return tcp_ao_del_cmd(sk, family, optval, optlen); + case TCP_AO_INFO: + return tcp_ao_info_cmd(sk, family, optval, optlen); + default: + WARN_ON_ONCE(1); + return -EINVAL; + } +} + +int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen) +{ + return tcp_parse_ao(sk, cmd, AF_INET, optval, optlen); +} + +/* tcp_ao_copy_mkts_to_user(ao_info, optval, optlen) + * + * @ao_info: struct tcp_ao_info on the socket that + * socket getsockopt(TCP_AO_GET_KEYS) is executed on + * @optval: pointer to array of tcp_ao_getsockopt structures in user space. + * Must be != NULL. + * @optlen: pointer to size of tcp_ao_getsockopt structure. + * Must be != NULL. + * + * Return value: 0 on success, a negative error number otherwise. + * + * optval points to an array of tcp_ao_getsockopt structures in user space. + * optval[0] is used as both input and output to getsockopt. It determines + * which keys are returned by the kernel. + * optval[0].nkeys is the size of the array in user space. On return it contains + * the number of keys matching the search criteria. + * If tcp_ao_getsockopt::get_all is set, then all keys in the socket are + * returned, otherwise only keys matching <addr, prefix, sndid, rcvid> + * in optval[0] are returned. + * optlen is also used as both input and output. The user provides the size + * of struct tcp_ao_getsockopt in user space, and the kernel returns the size + * of the structure in kernel space. + * The size of struct tcp_ao_getsockopt may differ between user and kernel. + * There are three cases to consider: + * * If usize == ksize, then keys are copied verbatim. + * * If usize < ksize, then the userspace has passed an old struct to a + * newer kernel. The rest of the trailing bytes in optval[0] + * (ksize - usize) are interpreted as 0 by the kernel. + * * If usize > ksize, then the userspace has passed a new struct to an + * older kernel. The trailing bytes unknown to the kernel (usize - ksize) + * are checked to ensure they are zeroed, otherwise -E2BIG is returned. + * On return the kernel fills in min(usize, ksize) in each entry of the array. + * The layout of the fields in the user and kernel structures is expected to + * be the same (including in the 32bit vs 64bit case). + */ +static int tcp_ao_copy_mkts_to_user(struct tcp_ao_info *ao_info, + sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_getsockopt opt_in, opt_out; + struct tcp_ao_key *key, *current_key; + bool do_address_matching = true; + union tcp_ao_addr *addr = NULL; + int err, l3index, user_len; + unsigned int max_keys; /* maximum number of keys to copy to user */ + size_t out_offset = 0; + size_t bytes_to_write; /* number of bytes to write to user level */ + u32 matched_keys; /* keys from ao_info matched so far */ + int optlen_out; + __be16 port = 0; + + if (copy_from_sockptr(&user_len, optlen, sizeof(int))) + return -EFAULT; + + if (user_len <= 0) + return -EINVAL; + + memset(&opt_in, 0, sizeof(struct tcp_ao_getsockopt)); + err = copy_struct_from_sockptr(&opt_in, sizeof(opt_in), + optval, user_len); + if (err < 0) + return err; + + if (opt_in.pkt_good || opt_in.pkt_bad) + return -EINVAL; + if (opt_in.keyflags & ~TCP_AO_GET_KEYF_VALID) + return -EINVAL; + if (opt_in.ifindex && !(opt_in.keyflags & TCP_AO_KEYF_IFINDEX)) + return -EINVAL; + + if (opt_in.reserved != 0) + return -EINVAL; + + max_keys = opt_in.nkeys; + l3index = (opt_in.keyflags & TCP_AO_KEYF_IFINDEX) ? opt_in.ifindex : -1; + + if (opt_in.get_all || opt_in.is_current || opt_in.is_rnext) { + if (opt_in.get_all && (opt_in.is_current || opt_in.is_rnext)) + return -EINVAL; + do_address_matching = false; + } + + switch (opt_in.addr.ss_family) { + case AF_INET: { + struct sockaddr_in *sin; + __be32 mask; + + sin = (struct sockaddr_in *)&opt_in.addr; + port = sin->sin_port; + addr = (union tcp_ao_addr *)&sin->sin_addr; + + if (opt_in.prefix > 32) + return -EINVAL; + + if (ntohl(sin->sin_addr.s_addr) == INADDR_ANY && + opt_in.prefix != 0) + return -EINVAL; + + mask = inet_make_mask(opt_in.prefix); + if (sin->sin_addr.s_addr & ~mask) + return -EINVAL; + + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin6; + struct in6_addr *addr6; + + sin6 = (struct sockaddr_in6 *)&opt_in.addr; + addr = (union tcp_ao_addr *)&sin6->sin6_addr; + addr6 = &sin6->sin6_addr; + port = sin6->sin6_port; + + /* We don't have to change family and @addr here if + * ipv6_addr_v4mapped() like in key adding: + * tcp_ao_key_cmp() does it. Do the sanity checks though. + */ + if (opt_in.prefix != 0) { + if (ipv6_addr_v4mapped(addr6)) { + __be32 mask, addr4 = addr6->s6_addr32[3]; + + if (opt_in.prefix > 32 || + ntohl(addr4) == INADDR_ANY) + return -EINVAL; + mask = inet_make_mask(opt_in.prefix); + if (addr4 & ~mask) + return -EINVAL; + } else { + struct in6_addr pfx; + + if (ipv6_addr_any(addr6) || + opt_in.prefix > 128) + return -EINVAL; + + ipv6_addr_prefix(&pfx, addr6, opt_in.prefix); + if (ipv6_addr_cmp(&pfx, addr6)) + return -EINVAL; + } + } else if (!ipv6_addr_any(addr6)) { + return -EINVAL; + } + break; + } + case 0: + if (!do_address_matching) + break; + fallthrough; + default: + return -EAFNOSUPPORT; + } + + if (!do_address_matching) { + /* We could just ignore those, but let's do stricter checks */ + if (addr || port) + return -EINVAL; + if (opt_in.prefix || opt_in.sndid || opt_in.rcvid) + return -EINVAL; + } + + bytes_to_write = min_t(int, user_len, sizeof(struct tcp_ao_getsockopt)); + matched_keys = 0; + /* May change in RX, while we're dumping, pre-fetch it */ + current_key = READ_ONCE(ao_info->current_key); + + hlist_for_each_entry_rcu(key, &ao_info->head, node) { + if (opt_in.get_all) + goto match; + + if (opt_in.is_current || opt_in.is_rnext) { + if (opt_in.is_current && key == current_key) + goto match; + if (opt_in.is_rnext && key == ao_info->rnext_key) + goto match; + continue; + } + + if (tcp_ao_key_cmp(key, l3index, addr, opt_in.prefix, + opt_in.addr.ss_family, + opt_in.sndid, opt_in.rcvid) != 0) + continue; +match: + matched_keys++; + if (matched_keys > max_keys) + continue; + + memset(&opt_out, 0, sizeof(struct tcp_ao_getsockopt)); + + if (key->family == AF_INET) { + struct sockaddr_in *sin_out = (struct sockaddr_in *)&opt_out.addr; + + sin_out->sin_family = key->family; + sin_out->sin_port = 0; + memcpy(&sin_out->sin_addr, &key->addr, sizeof(struct in_addr)); + } else { + struct sockaddr_in6 *sin6_out = (struct sockaddr_in6 *)&opt_out.addr; + + sin6_out->sin6_family = key->family; + sin6_out->sin6_port = 0; + memcpy(&sin6_out->sin6_addr, &key->addr, sizeof(struct in6_addr)); + } + opt_out.sndid = key->sndid; + opt_out.rcvid = key->rcvid; + opt_out.prefix = key->prefixlen; + opt_out.keyflags = key->keyflags; + opt_out.is_current = (key == current_key); + opt_out.is_rnext = (key == ao_info->rnext_key); + opt_out.nkeys = 0; + opt_out.maclen = key->maclen; + opt_out.keylen = key->keylen; + opt_out.ifindex = key->l3index; + opt_out.pkt_good = atomic64_read(&key->pkt_good); + opt_out.pkt_bad = atomic64_read(&key->pkt_bad); + memcpy(&opt_out.key, key->key, key->keylen); + tcp_sigpool_algo(key->tcp_sigpool_id, opt_out.alg_name, 64); + + /* Copy key to user */ + if (copy_to_sockptr_offset(optval, out_offset, + &opt_out, bytes_to_write)) + return -EFAULT; + out_offset += user_len; + } + + optlen_out = (int)sizeof(struct tcp_ao_getsockopt); + if (copy_to_sockptr(optlen, &optlen_out, sizeof(int))) + return -EFAULT; + + out_offset = offsetof(struct tcp_ao_getsockopt, nkeys); + if (copy_to_sockptr_offset(optval, out_offset, + &matched_keys, sizeof(u32))) + return -EFAULT; + + return 0; +} + +int tcp_ao_get_mkts(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_info *ao_info; + + ao_info = setsockopt_ao_info(sk); + if (IS_ERR(ao_info)) + return PTR_ERR(ao_info); + if (!ao_info) + return -ENOENT; + + return tcp_ao_copy_mkts_to_user(ao_info, optval, optlen); +} + +int tcp_ao_get_sock_info(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_ao_info_opt out, in = {}; + struct tcp_ao_key *current_key; + struct tcp_ao_info *ao; + int err, len; + + if (copy_from_sockptr(&len, optlen, sizeof(int))) + return -EFAULT; + + if (len <= 0) + return -EINVAL; + + /* Copying this "in" only to check ::reserved, ::reserved2, + * that may be needed to extend (struct tcp_ao_info_opt) and + * what getsockopt() provides in future. + */ + err = copy_struct_from_sockptr(&in, sizeof(in), optval, len); + if (err) + return err; + + if (in.reserved != 0 || in.reserved2 != 0) + return -EINVAL; + + ao = setsockopt_ao_info(sk); + if (IS_ERR(ao)) + return PTR_ERR(ao); + if (!ao) + return -ENOENT; + + memset(&out, 0, sizeof(out)); + out.ao_required = ao->ao_required; + out.accept_icmps = ao->accept_icmps; + out.pkt_good = atomic64_read(&ao->counters.pkt_good); + out.pkt_bad = atomic64_read(&ao->counters.pkt_bad); + out.pkt_key_not_found = atomic64_read(&ao->counters.key_not_found); + out.pkt_ao_required = atomic64_read(&ao->counters.ao_required); + out.pkt_dropped_icmp = atomic64_read(&ao->counters.dropped_icmp); + + current_key = READ_ONCE(ao->current_key); + if (current_key) { + out.set_current = 1; + out.current_key = current_key->sndid; + } + if (ao->rnext_key) { + out.set_rnext = 1; + out.rnext = ao->rnext_key->rcvid; + } + + if (copy_to_sockptr(optval, &out, min_t(int, len, sizeof(out)))) + return -EFAULT; + + return 0; +} + +int tcp_ao_set_repair(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_repair cmd; + struct tcp_ao_key *key; + struct tcp_ao_info *ao; + int err; + + if (optlen < sizeof(cmd)) + return -EINVAL; + + err = copy_struct_from_sockptr(&cmd, sizeof(cmd), optval, optlen); + if (err) + return err; + + if (!tp->repair) + return -EPERM; + + ao = setsockopt_ao_info(sk); + if (IS_ERR(ao)) + return PTR_ERR(ao); + if (!ao) + return -ENOENT; + + WRITE_ONCE(ao->lisn, cmd.snt_isn); + WRITE_ONCE(ao->risn, cmd.rcv_isn); + WRITE_ONCE(ao->snd_sne, cmd.snd_sne); + WRITE_ONCE(ao->rcv_sne, cmd.rcv_sne); + + hlist_for_each_entry_rcu(key, &ao->head, node) + tcp_ao_cache_traffic_keys(sk, ao, key); + + return 0; +} + +int tcp_ao_get_repair(struct sock *sk, sockptr_t optval, sockptr_t optlen) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_ao_repair opt; + struct tcp_ao_info *ao; + int len; + + if (copy_from_sockptr(&len, optlen, sizeof(int))) + return -EFAULT; + + if (len <= 0) + return -EINVAL; + + if (!tp->repair) + return -EPERM; + + rcu_read_lock(); + ao = getsockopt_ao_info(sk); + if (IS_ERR_OR_NULL(ao)) { + rcu_read_unlock(); + return ao ? PTR_ERR(ao) : -ENOENT; + } + + opt.snt_isn = ao->lisn; + opt.rcv_isn = ao->risn; + opt.snd_sne = READ_ONCE(ao->snd_sne); + opt.rcv_sne = READ_ONCE(ao->rcv_sne); + rcu_read_unlock(); + + if (copy_to_sockptr(optval, &opt, min_t(int, len, sizeof(opt)))) + return -EFAULT; + return 0; +} diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 146792cd26..22358032dd 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -258,7 +258,7 @@ static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) u64 rate = bw; rate = bbr_rate_bytes_per_sec(sk, rate, gain); - rate = min_t(u64, rate, sk->sk_max_pacing_rate); + rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)); return rate; } @@ -278,7 +278,8 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk) } bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT; do_div(bw, rtt_us); - sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); + WRITE_ONCE(sk->sk_pacing_rate, + bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain)); } /* Pace using current bw estimate and a gain factor. */ @@ -290,14 +291,14 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) bbr_init_pacing_rate_from_rtt(sk); - if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) - sk->sk_pacing_rate = rate; + if (bbr_full_bw_reached(sk) || rate > READ_ONCE(sk->sk_pacing_rate)) + WRITE_ONCE(sk->sk_pacing_rate, rate); } /* override sysctl_tcp_min_tso_segs */ __bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk) { - return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; + return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2; } static u32 bbr_tso_segs_goal(struct sock *sk) @@ -309,7 +310,7 @@ static u32 bbr_tso_segs_goal(struct sock *sk) * driver provided sk_gso_max_size. */ bytes = min_t(unsigned long, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), + READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift), GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER); segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 01b50fa791..4cbe4b4442 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -247,4 +247,5 @@ static void __exit tcp_diag_exit(void) module_init(tcp_diag_init); module_exit(tcp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e6c4929549..701cb87043 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -693,6 +693,23 @@ new_measure: tp->rcv_rtt_est.time = tp->tcp_mstamp; } +static s32 tcp_rtt_tsopt_us(const struct tcp_sock *tp) +{ + u32 delta, delta_us; + + delta = tcp_time_stamp_ts(tp) - tp->rx_opt.rcv_tsecr; + if (tp->tcp_usec_ts) + return delta; + + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + if (!delta) + delta = 1; + delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + return delta_us; + } + return -1; +} + static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) { @@ -704,15 +721,10 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, if (TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) { - u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; - u32 delta_us; - - if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { - if (!delta) - delta = 1; - delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); - tcp_rcv_rtt_update(tp, delta_us, 0); - } + s32 delta = tcp_rtt_tsopt_us(tp); + + if (delta >= 0) + tcp_rcv_rtt_update(tp, delta, 0); } } @@ -778,6 +790,16 @@ new_measure: tp->rcvq_space.time = tp->tcp_mstamp; } +static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet_connection_sock *icsk = inet_csk(sk); + + if (skb->protocol == htons(ETH_P_IPV6)) + icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb))); +#endif +} + /* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval. When a * connection starts up, we want to ack as quickly as possible. The @@ -826,6 +848,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) } } icsk->icsk_ack.lrcvtime = now; + tcp_save_lrcv_flowlabel(sk, skb); tcp_ecn_check_ce(sk, skb); @@ -940,8 +963,8 @@ static void tcp_update_pacing_rate(struct sock *sk) * without any lock. We want to make sure compiler wont store * intermediate values in this location. */ - WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate, - sk->sk_max_pacing_rate)); + WRITE_ONCE(sk->sk_pacing_rate, + min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate))); } /* Calculate rto without backoff. This is the second half of Van Jacobson's @@ -2101,6 +2124,10 @@ void tcp_clear_retrans(struct tcp_sock *tp) tp->undo_marker = 0; tp->undo_retrans = -1; tp->sacked_out = 0; + tp->rto_stamp = 0; + tp->total_rto = 0; + tp->total_rto_recoveries = 0; + tp->total_rto_time = 0; } static inline void tcp_init_undo(struct tcp_sock *tp) @@ -2428,7 +2455,7 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, const struct sk_buff *skb) { return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) && - tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb)); + tcp_tsopt_ecr_before(tp, tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); } /* Nothing was retransmitted or returned timestamp is less @@ -2839,6 +2866,14 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack) tcp_set_ca_state(sk, TCP_CA_Recovery); } +static void tcp_update_rto_time(struct tcp_sock *tp) +{ + if (tp->rto_stamp) { + tp->total_rto_time += tcp_time_stamp_ms(tp) - tp->rto_stamp; + tp->rto_stamp = 0; + } +} + /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are * recovered or spurious. Otherwise retransmits more on partial ACKs. */ @@ -3043,6 +3078,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, break; case TCP_CA_Loss: tcp_process_loss(sk, flag, num_dupack, rexmit); + if (icsk->icsk_ca_state != TCP_CA_Loss) + tcp_update_rto_time(tp); tcp_identify_packet_loss(sk, ack_flag); if (!(icsk->icsk_ca_state == TCP_CA_Open || (*ack_flag & FLAG_LOST_RETRANS))) @@ -3122,17 +3159,10 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && - flag & FLAG_ACKED) { - u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; - - if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { - if (!delta) - delta = 1; - seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); - ca_rtt_us = seq_rtt_us; - } - } + if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && + tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) + seq_rtt_us = ca_rtt_us = tcp_rtt_tsopt_us(tp); + rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ if (seq_rtt_us < 0) return false; @@ -3542,6 +3572,21 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp, (ack_seq == tp->snd_wl1 && (nwin > tp->snd_wnd || !nwin)); } +static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held((struct sock *)tp)); + if (ao && ack < tp->snd_una) + ao->snd_sne++; +#endif +} + /* If we update tp->snd_una, also update tp->bytes_acked */ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) { @@ -3549,9 +3594,25 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) sock_owned_by_me((struct sock *)tp); tp->bytes_acked += delta; + tcp_snd_sne_update(tp, ack); tp->snd_una = ack; } +static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + if (!static_branch_unlikely(&tcp_ao_needed.key)) + return; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held((struct sock *)tp)); + if (ao && seq < tp->rcv_nxt) + ao->rcv_sne++; +#endif +} + /* If we update tp->rcv_nxt, also update tp->bytes_received */ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) { @@ -3559,6 +3620,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) sock_owned_by_me((struct sock *)tp); tp->bytes_received += delta; + tcp_rcv_sne_update(tp, seq); WRITE_ONCE(tp->rcv_nxt, seq); } @@ -4229,39 +4291,58 @@ static bool tcp_fast_parse_options(const struct net *net, return true; } -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) /* - * Parse MD5 Signature option + * Parse Signature options */ -const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) { int length = (th->doff << 2) - sizeof(*th); const u8 *ptr = (const u8 *)(th + 1); + unsigned int minlen = TCPOLEN_MD5SIG; + + if (IS_ENABLED(CONFIG_TCP_AO)) + minlen = sizeof(struct tcp_ao_hdr) + 1; + + *md5_hash = NULL; + *ao_hash = NULL; /* If not enough data remaining, we can short cut */ - while (length >= TCPOLEN_MD5SIG) { + while (length >= minlen) { int opcode = *ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: - return NULL; + return 0; case TCPOPT_NOP: length--; continue; default: opsize = *ptr++; if (opsize < 2 || opsize > length) - return NULL; - if (opcode == TCPOPT_MD5SIG) - return opsize == TCPOLEN_MD5SIG ? ptr : NULL; + return -EINVAL; + if (opcode == TCPOPT_MD5SIG) { + if (opsize != TCPOLEN_MD5SIG) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *md5_hash = ptr; + } else if (opcode == TCPOPT_AO) { + if (opsize <= sizeof(struct tcp_ao_hdr)) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *ao_hash = ptr; + } } ptr += opsize - 2; length -= opsize; } - return NULL; + return 0; } -EXPORT_SYMBOL(tcp_parse_md5sig_option); +EXPORT_SYMBOL(tcp_do_parse_auth_options); #endif /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM @@ -4287,6 +4368,23 @@ EXPORT_SYMBOL(tcp_parse_md5sig_option); * up to bandwidth of 18Gigabit/sec. 8) ] */ +/* Estimates max number of increments of remote peer TSval in + * a replay window (based on our current RTO estimation). + */ +static u32 tcp_tsval_replay(const struct sock *sk) +{ + /* If we use usec TS resolution, + * then expect the remote peer to use the same resolution. + */ + if (tcp_sk(sk)->tcp_usec_ts) + return inet_csk(sk)->icsk_rto * (USEC_PER_SEC / HZ); + + /* RFC 7323 recommends a TSval clock between 1ms and 1sec. + * We know that some OS (including old linux) can use 1200 Hz. + */ + return inet_csk(sk)->icsk_rto * 1200 / HZ; +} + static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); @@ -4294,7 +4392,7 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; - return (/* 1. Pure ACK with correct sequence number. */ + return /* 1. Pure ACK with correct sequence number. */ (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) && /* 2. ... and duplicate ACK. */ @@ -4304,7 +4402,8 @@ static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && /* 4. ... and sits in replay window. */ - (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); + (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= + tcp_tsval_replay(sk); } static inline bool tcp_paws_discard(const struct sock *sk, @@ -4504,12 +4603,23 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb) { /* When the ACK path fails or drops most ACKs, the sender would * timeout and spuriously retransmit the same segment repeatedly. - * The receiver remembers and reflects via DSACKs. Leverage the - * DSACK state and change the txhash to re-route speculatively. + * If it seems our ACKs are not reaching the other side, + * based on receiving a duplicate data segment with new flowlabel + * (suggesting the sender suffered an RTO), and we are not already + * repathing due to our own RTO, then rehash the socket to repath our + * packets. */ - if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq && +#if IS_ENABLED(CONFIG_IPV6) + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss && + skb->protocol == htons(ETH_P_IPV6) && + (tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel != + ntohl(ip6_flowlabel(ipv6_hdr(skb)))) && sk_rethink_txhash(sk)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); + + /* Save last flowlabel after a spurious retrans. */ + tcp_save_lrcv_flowlabel(sk, skb); +#endif } static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) @@ -4826,6 +4936,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) u32 seq, end_seq; bool fragstolen; + tcp_save_lrcv_flowlabel(sk, skb); tcp_ecn_check_ce(sk, skb); if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) { @@ -5571,6 +5682,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ inet_csk(sk)->icsk_ack.pending & ICSK_ACK_NOW) { + /* If we are running from __release_sock() in user context, + * Defer the ack until tcp_release_cb(). + */ + if (sock_owned_by_user_nocheck(sk) && + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_backlog_ack_defer)) { + set_bit(TCP_ACK_DEFERRED, &sk->sk_tsq_flags); + return; + } send_now: tcp_send_ack(sk); return; @@ -6105,6 +6224,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + tcp_ao_finish_connect(sk, skb); tcp_set_state(sk, TCP_ESTABLISHED); icsk->icsk_ack.lrcvtime = tcp_jiffies32; @@ -6253,7 +6373,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, - tcp_time_stamp(tp))) { + tcp_time_stamp_ts(tp))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; @@ -6390,6 +6510,16 @@ consume: * simultaneous connect with crossed SYNs. * Particularly, it can be connect to self. */ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + ao = rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao) { + WRITE_ONCE(ao->risn, th->seq); + ao->rcv_sne = 0; + } +#endif tcp_set_state(sk, TCP_SYN_RECV); if (tp->rx_opt.saw_tstamp) { @@ -6464,6 +6594,7 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) tcp_try_undo_recovery(sk); /* Reset rtx states to prevent spurious retransmits_timed_out() */ + tcp_update_rto_time(tp); tp->retrans_stamp = 0; inet_csk(sk)->icsk_retransmits = 0; @@ -6601,6 +6732,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) skb); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); } + tcp_ao_established(sk); smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->sk_state_change(sk); @@ -6977,6 +7109,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct flowi fl; u8 syncookies; +#ifdef CONFIG_TCP_AO + const struct tcp_ao_hdr *aoh; +#endif + syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); /* TW buckets are converted to open requests without @@ -7001,6 +7137,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, req->syncookie = want_cookie; tcp_rsk(req)->af_specific = af_ops; tcp_rsk(req)->ts_off = 0; + tcp_rsk(req)->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) tcp_rsk(req)->is_mptcp = 0; #endif @@ -7028,9 +7165,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (!dst) goto drop_and_free; - if (tmp_opt.tstamp_ok) + if (tmp_opt.tstamp_ok) { + tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); - + } if (!want_cookie && !isn) { int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog); @@ -7062,6 +7200,18 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, inet_rsk(req)->ecn_ok = 0; } +#ifdef CONFIG_TCP_AO + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + goto drop_and_release; /* Invalid TCP options */ + if (aoh) { + tcp_rsk(req)->used_tcp_ao = true; + tcp_rsk(req)->ao_rcv_next = aoh->keyid; + tcp_rsk(req)->ao_keyid = aoh->rnext_keyid; + + } else { + tcp_rsk(req)->used_tcp_ao = false; + } +#endif tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->txhash = net_tx_rndhash(); tcp_rsk(req)->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c7ffab37a3..0c50c5a32b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -296,6 +296,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) rt = NULL; goto failure; } + tp->tcp_usec_ts = dst_tcp_usec_ts(&rt->dst); /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); @@ -493,6 +494,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { + /* To increase the counter of ignored icmps for TCP-AO */ + tcp_ao_ignore_icmp(sk, AF_INET, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } @@ -506,6 +509,11 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) return 0; } + if (tcp_ao_ignore_icmp(sk, AF_INET, type, code)) { + sock_put(sk); + return 0; + } + bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. @@ -656,6 +664,52 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); +#define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) + +static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, + struct ip_reply_arg *arg, struct tcphdr *reply, + __be32 reply_options[REPLY_OPTIONS_LEN]) +{ +#ifdef CONFIG_TCP_AO + int sdif = tcp_v4_sdif(skb); + int dif = inet_iif(skb); + int l3index = sdif ? dif : 0; + bool allocated_traffic_key; + struct tcp_ao_key *key; + char *traffic_key; + bool drop = true; + u32 ao_sne = 0; + u8 keyid; + + rcu_read_lock(); + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, ntohl(reply->seq), + &key, &traffic_key, &allocated_traffic_key, + &keyid, &ao_sne)) + goto out; + + reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) | + (aoh->rnext_keyid << 8) | keyid); + arg->iov[0].iov_len += tcp_ao_len_aligned(key); + reply->doff = arg->iov[0].iov_len / 4; + + if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1], + key, traffic_key, + (union tcp_ao_addr *)&ip_hdr(skb)->saddr, + (union tcp_ao_addr *)&ip_hdr(skb)->daddr, + reply, ao_sne)) + goto out; + drop = false; +out: + rcu_read_unlock(); + if (allocated_traffic_key) + kfree(traffic_key); + return drop; +#else + return true; +#endif +} + /* * This routine will send an RST to the other tcp. * @@ -669,26 +723,21 @@ EXPORT_SYMBOL(tcp_v4_send_check); * Exception: precedence violation. We do not implement it in any case. */ -#ifdef CONFIG_TCP_MD5SIG -#define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED -#else -#define OPTION_BYTES sizeof(__be32) -#endif - static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; - __be32 opt[OPTION_BYTES / sizeof(__be32)]; + __be32 opt[REPLY_OPTIONS_LEN]; } rep; + const __u8 *md5_hash_location = NULL; + const struct tcp_ao_hdr *aoh; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key = NULL; - const __u8 *hash_location = NULL; unsigned char newhash[16]; - int genhash; struct sock *sk1 = NULL; + int genhash; #endif u64 transmit_time = 0; struct sock *ctl_sk; @@ -725,9 +774,16 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) + return; + + if (aoh && tcp_v4_ao_sign_reset(sk, skb, aoh, &arg, &rep.th, rep.opt)) + return; + #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); - hash_location = tcp_parse_md5sig_option(th); if (sk && sk_fullsock(sk)) { const union tcp_md5_addr *addr; int l3index; @@ -738,7 +794,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); - } else if (hash_location) { + } else if (md5_hash_location) { const union tcp_md5_addr *addr; int sdif = tcp_v4_sdif(skb); int dif = inet_iif(skb); @@ -770,7 +826,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) + if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } @@ -828,7 +884,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ? - inet_twsk(sk)->tw_priority : sk->sk_priority; + inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority); transmit_time = tcp_transmit_time(sk); xfrm_sk_clone_policy(ctl_sk, sk); txhash = (sk->sk_state == TCP_TIME_WAIT) ? @@ -862,17 +918,13 @@ out: static void tcp_v4_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, + struct tcp_key *key, int reply_flags, u8 tos, u32 txhash) { const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; - __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) -#ifdef CONFIG_TCP_MD5SIG - + (TCPOLEN_MD5SIG_ALIGNED >> 2) -#endif - ]; + __be32 opt[(MAX_TCP_OPTION_SPACE >> 2)]; } rep; struct net *net = sock_net(sk); struct ip_reply_arg arg; @@ -903,7 +955,7 @@ static void tcp_v4_send_ack(const struct sock *sk, rep.th.window = htons(win); #ifdef CONFIG_TCP_MD5SIG - if (key) { + if (tcp_key_is_md5(key)) { int offset = (tsecr) ? 3 : 0; rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | @@ -914,10 +966,28 @@ static void tcp_v4_send_ack(const struct sock *sk, rep.th.doff = arg.iov[0].iov_len/4; tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], - key, ip_hdr(skb)->saddr, + key->md5_key, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, &rep.th); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_key_is_ao(key)) { + int offset = (tsecr) ? 3 : 0; + + rep.opt[offset++] = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + key->rcv_next); + arg.iov[0].iov_len += tcp_ao_len_aligned(key->ao_key); + rep.th.doff = arg.iov[0].iov_len / 4; + + tcp_ao_hash_hdr(AF_INET, (char *)&rep.opt[offset], + key->ao_key, key->traffic_key, + (union tcp_ao_addr *)&ip_hdr(skb)->saddr, + (union tcp_ao_addr *)&ip_hdr(skb)->daddr, + &rep.th, key->sne); + } +#endif arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ @@ -950,18 +1020,53 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_key key = {}; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + + if (static_branch_unlikely(&tcp_ao_needed.key)) { + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) { + inet_twsk_put(tw); + return; + } + + if (aoh) + key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + } + } + if (key.ao_key) { + struct tcp_ao_key *rnext_key; + + key.traffic_key = snd_other_key(key.ao_key); + key.sne = READ_ONCE(ao_info->snd_sne); + rnext_key = READ_ONCE(ao_info->rnext_key); + key.rcv_next = rnext_key->rcvid; + key.type = TCP_KEY_AO; +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + key.md5_key = tcp_twsk_md5_key(tcptw); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp_raw() + tcptw->tw_ts_offset, + tcp_tw_tsval(tcptw), tcptw->tw_ts_recent, - tw->tw_bound_dev_if, - tcp_twsk_md5_key(tcptw), + tw->tw_bound_dev_if, &key, tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, tw->tw_tos, - tw->tw_txhash - ); + tw->tw_txhash); inet_twsk_put(tw); } @@ -969,8 +1074,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - const union tcp_md5_addr *addr; - int l3index; + struct tcp_key key = {}; /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -978,23 +1082,77 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt; +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key) && + tcp_rsk_used_ao(req)) { + const union tcp_md5_addr *addr; + const struct tcp_ao_hdr *aoh; + int l3index; + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + return; + if (!aoh) + return; + + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; + l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; + key.ao_key = tcp_ao_do_lookup(sk, l3index, addr, AF_INET, + aoh->rnext_keyid, -1); + if (unlikely(!key.ao_key)) { + /* Send ACK with any matching MKT for the peer */ + key.ao_key = tcp_ao_do_lookup(sk, l3index, addr, AF_INET, -1, -1); + /* Matching key disappeared (user removed the key?) + * let the handshake timeout. + */ + if (!key.ao_key) { + net_info_ratelimited("TCP-AO key for (%pI4, %d)->(%pI4, %d) suddenly disappeared, won't ACK new connection\n", + addr, + ntohs(tcp_hdr(skb)->source), + &ip_hdr(skb)->daddr, + ntohs(tcp_hdr(skb)->dest)); + return; + } + } + key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); + if (!key.traffic_key) + return; + + key.type = TCP_KEY_AO; + key.rcv_next = aoh->keyid; + tcp_v4_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + const union tcp_md5_addr *addr; + int l3index; + + addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; + l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; + key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } + /* RFC 7323 2.3 * The window field (SEG.WND) of every outgoing segment, with the * exception of <SYN> segments, MUST be right-shifted by * Rcv.Wind.Shift bits: */ - addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr; - l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0; tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, + tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), - 0, - tcp_md5_do_lookup(sk, l3index, addr, AF_INET), + 0, &key, inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, ip_hdr(skb)->tos, READ_ONCE(tcp_rsk(req)->txhash)); + if (tcp_key_is_ao(&key)) + kfree(key.traffic_key); } /* @@ -1024,10 +1182,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); - tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? - (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | - (inet_sk(sk)->tos & INET_ECN_MASK) : - inet_sk(sk)->tos; + tos = READ_ONCE(inet_sk(sk)->tos); + + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) + tos = (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (tos & INET_ECN_MASK); if (!INET_ECN_is_capable(tos) && tcp_bpf_ca_needs_ecn((struct sock *)req)) @@ -1080,7 +1239,7 @@ static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key * /* Find the Key structure for an address. */ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, - int family) + int family, bool any_l3index) { const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1099,7 +1258,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, lockdep_sock_is_held(sk)) { if (key->family != family) continue; - if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index) + if (!any_l3index && key->flags & TCP_MD5SIG_FLAG_IFINDEX && + key->l3index != l3index) continue; if (family == AF_INET) { mask = inet_make_mask(key->prefixlen); @@ -1219,10 +1379,6 @@ static int __tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = sock_kmalloc(sk, sizeof(*key), gfp | __GFP_ZERO); if (!key) return -ENOMEM; - if (!tcp_alloc_md5sig_pool()) { - sock_kfree_s(sk, key, sizeof(*key)); - return -ENOMEM; - } memcpy(key->key, newkey, newkeylen); key->keylen = newkeylen; @@ -1244,15 +1400,21 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { - if (tcp_md5sig_info_add(sk, GFP_KERNEL)) + if (tcp_md5_alloc_sigpool()) return -ENOMEM; + if (tcp_md5sig_info_add(sk, GFP_KERNEL)) { + tcp_md5_release_sigpool(); + return -ENOMEM; + } + if (!static_branch_inc(&tcp_md5_needed.key)) { struct tcp_md5sig_info *md5sig; md5sig = rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk)); rcu_assign_pointer(tp->md5sig_info, NULL); kfree_rcu(md5sig, rcu); + tcp_md5_release_sigpool(); return -EUSERS; } } @@ -1269,8 +1431,12 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, struct tcp_sock *tp = tcp_sk(sk); if (!rcu_dereference_protected(tp->md5sig_info, lockdep_sock_is_held(sk))) { - if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) + tcp_md5_add_sigpool(); + + if (tcp_md5sig_info_add(sk, sk_gfp_mask(sk, GFP_ATOMIC))) { + tcp_md5_release_sigpool(); return -ENOMEM; + } if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) { struct tcp_md5sig_info *md5sig; @@ -1279,6 +1445,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, net_warn_ratelimited("Too many TCP-MD5 keys in the system\n"); rcu_assign_pointer(tp->md5sig_info, NULL); kfree_rcu(md5sig, rcu); + tcp_md5_release_sigpool(); return -EUSERS; } } @@ -1304,7 +1471,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, } EXPORT_SYMBOL(tcp_md5_do_del); -static void tcp_clear_md5_list(struct sock *sk) +void tcp_clear_md5_list(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; @@ -1328,6 +1495,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, const union tcp_md5_addr *addr; u8 prefixlen = 32; int l3index = 0; + bool l3flag; u8 flags; if (optlen < sizeof(cmd)) @@ -1340,6 +1508,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { @@ -1374,11 +1543,17 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET, l3flag ? l3index : -1, false)) + return -EKEYREJECTED; + return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, +static int tcp_v4_md5_hash_headers(struct tcp_sigpool *hp, __be32 daddr, __be32 saddr, const struct tcphdr *th, int nbytes) { @@ -1398,38 +1573,35 @@ static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->md5_req); + return crypto_ahash_update(hp->req); } static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, __be32 daddr, __be32 saddr, const struct tcphdr *th) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; + struct tcp_sigpool hp; - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) + if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -1438,9 +1610,8 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); + struct tcp_sigpool hp; __be32 saddr, daddr; if (sk) { /* valid for establish/request sockets */ @@ -1452,30 +1623,28 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, daddr = iph->daddr; } - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len)) + if (tcp_v4_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; - if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -1524,6 +1693,11 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { .req_md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, #endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v4_ao_lookup_rsk, + .ao_calc_key = tcp_v4_ao_calc_key_rsk, + .ao_synack_hash = tcp_v4_ao_synack_hash, +#endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v4_init_sequence, #endif @@ -1625,12 +1799,16 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, /* Copy over the MD5 key from the original socket */ addr = (union tcp_md5_addr *)&newinet->inet_daddr; key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); - if (key) { + if (key && !tcp_rsk_used_ao(req)) { if (tcp_md5_key_copy(newsk, addr, AF_INET, 32, l3index, key)) goto put_and_exit; sk_gso_disable(newsk); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET)) + goto put_and_exit; /* OOM, release back memory */ +#endif if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; @@ -2041,9 +2219,9 @@ process: if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else - drop_reason = tcp_inbound_md5_hash(sk, skb, - &iph->saddr, &iph->daddr, - AF_INET, dif, sdif); + drop_reason = tcp_inbound_hash(sk, req, skb, + &iph->saddr, &iph->daddr, + AF_INET, dif, sdif); if (unlikely(drop_reason)) { sk_drops_add(sk, skb); reqsk_put(req); @@ -2120,8 +2298,8 @@ process: goto discard_and_relse; } - drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, - &iph->daddr, AF_INET, dif, sdif); + drop_reason = tcp_inbound_hash(sk, NULL, skb, &iph->saddr, &iph->daddr, + AF_INET, dif, sdif); if (drop_reason) goto discard_and_relse; @@ -2268,11 +2446,19 @@ const struct inet_connection_sock_af_ops ipv4_specific = { }; EXPORT_SYMBOL(ipv4_specific); -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v4_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v4_ao_lookup, + .calc_ao_hash = tcp_v4_ao_hash_skb, + .ao_parse = tcp_v4_parse_ao, + .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, +#endif }; #endif @@ -2287,13 +2473,25 @@ static int tcp_v4_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv4_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific; #endif return 0; } +#ifdef CONFIG_TCP_MD5SIG +static void tcp_md5sig_info_free_rcu(struct rcu_head *head) +{ + struct tcp_md5sig_info *md5sig; + + md5sig = container_of(head, struct tcp_md5sig_info, rcu); + kfree(md5sig); + static_branch_slow_dec_deferred(&tcp_md5_needed); + tcp_md5_release_sigpool(); +} +#endif + void tcp_v4_destroy_sock(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); @@ -2318,12 +2516,15 @@ void tcp_v4_destroy_sock(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ if (tp->md5sig_info) { + struct tcp_md5sig_info *md5sig; + + md5sig = rcu_dereference_protected(tp->md5sig_info, 1); tcp_clear_md5_list(sk); - kfree_rcu(rcu_dereference_protected(tp->md5sig_info, 1), rcu); - tp->md5sig_info = NULL; - static_branch_slow_dec_deferred(&tcp_md5_needed); + call_rcu(&md5sig->rcu, tcp_md5sig_info_free_rcu); + rcu_assign_pointer(tp->md5sig_info, NULL); } #endif + tcp_ao_destroy_sock(sk, false); /* Clean up a referenced TCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash) @@ -3264,6 +3465,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC; net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC; net->ipv4.sysctl_tcp_comp_sack_nr = 44; + net->ipv4.sysctl_tcp_backlog_ack_defer = 1; net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 0; atomic_set(&net->ipv4.tfo_active_disable_times, 0); @@ -3287,6 +3489,8 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_syn_linear_timeouts = 4; net->ipv4.sysctl_tcp_shrink_window = 0; + net->ipv4.sysctl_tcp_pingpong_thresh = 1; + return 0; } diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c index ae36780977..52fe171674 100644 --- a/net/ipv4/tcp_lp.c +++ b/net/ipv4/tcp_lp.c @@ -272,7 +272,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) { struct tcp_sock *tp = tcp_sk(sk); struct lp *lp = inet_csk_ca(sk); - u32 now = tcp_time_stamp(tp); + u32 now = tcp_time_stamp_ts(tp); u32 delta; if (sample->rtt_us > 0) diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 7aca12c59c..c2a9255385 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -898,11 +898,13 @@ static void tcp_metrics_flush_all(struct net *net) unsigned int row; for (row = 0; row < max_rows; row++, hb++) { - struct tcp_metrics_block __rcu **pp; + struct tcp_metrics_block __rcu **pp = &hb->chain; bool match; + if (!rcu_access_pointer(*pp)) + continue; + spin_lock_bh(&tcp_metrics_lock); - pp = &hb->chain; for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { match = net ? net_eq(tm_net(tm), net) : !refcount_read(&tm_net(tm)->ns.count); @@ -914,6 +916,7 @@ static void tcp_metrics_flush_all(struct net *net) } } spin_unlock_bh(&tcp_metrics_lock); + cond_resched(); } } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b98d476f15..9e85f2a0bd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -51,6 +51,18 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw, return TCP_TW_SUCCESS; } +static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq) +{ +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao; + + ao = rcu_dereference(tcptw->ao_info); + if (unlikely(ao && seq < tcptw->tw_rcv_nxt)) + WRITE_ONCE(ao->rcv_sne, ao->rcv_sne + 1); +#endif + tcptw->tw_rcv_nxt = seq; +} + /* * * Main purpose of TIME-WAIT state is to close connection gracefully, * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN @@ -136,7 +148,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, /* FIN arrived, enter true time-wait state. */ tw->tw_substate = TCP_TIME_WAIT; - tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; + twsk_rcv_nxt_update(tcptw, TCP_SKB_CB(skb)->end_seq); + if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent_stamp = ktime_get_seconds(); tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -261,10 +274,9 @@ static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); if (!tcptw->tw_md5_key) return; - if (!tcp_alloc_md5sig_pool()) - goto out_free; if (!static_key_fast_inc_not_disabled(&tcp_md5_needed.key.key)) goto out_free; + tcp_md5_add_sigpool(); } return; out_free: @@ -280,7 +292,7 @@ out_free: void tcp_time_wait(struct sock *sk, int state, int timeo) { const struct inet_connection_sock *icsk = inet_csk(sk); - const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_timewait_sock *tw; @@ -292,7 +304,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_transparent = inet_test_bit(TRANSPARENT, sk); tw->tw_mark = sk->sk_mark; - tw->tw_priority = sk->sk_priority; + tw->tw_priority = READ_ONCE(sk->sk_priority); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; @@ -300,6 +312,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tcptw->tw_ts_recent = tp->rx_opt.ts_recent; tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; tcptw->tw_ts_offset = tp->tsoffset; + tw->tw_usec_ts = tp->tcp_usec_ts; tcptw->tw_last_oow_ack_time = 0; tcptw->tw_tx_delay = tp->tcp_tx_delay; tw->tw_txhash = sk->sk_txhash; @@ -316,6 +329,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) #endif tcp_time_wait_init(sk, tcptw); + tcp_ao_time_wait(tcptw, tp); /* Get the TIME_WAIT timeout firing. */ if (timeo < rto) @@ -348,18 +362,29 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } EXPORT_SYMBOL(tcp_time_wait); +#ifdef CONFIG_TCP_MD5SIG +static void tcp_md5_twsk_free_rcu(struct rcu_head *head) +{ + struct tcp_md5sig_key *key; + + key = container_of(head, struct tcp_md5sig_key, rcu); + kfree(key); + static_branch_slow_dec_deferred(&tcp_md5_needed); + tcp_md5_release_sigpool(); +} +#endif + void tcp_twsk_destructor(struct sock *sk) { #ifdef CONFIG_TCP_MD5SIG if (static_branch_unlikely(&tcp_md5_needed.key)) { struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) { - kfree_rcu(twsk->tw_md5_key, rcu); - static_branch_slow_dec_deferred(&tcp_md5_needed); - } + if (twsk->tw_md5_key) + call_rcu(&twsk->tw_md5_key->rcu, tcp_md5_twsk_free_rcu); } #endif + tcp_ao_destroy_sock(sk, true); } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); @@ -494,6 +519,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, const struct tcp_sock *oldtp; struct tcp_sock *newtp; u32 seq; +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *ao_key; +#endif if (!newsk) return NULL; @@ -554,22 +582,41 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->max_window = newtp->snd_wnd; if (newtp->rx_opt.tstamp_ok) { + newtp->tcp_usec_ts = treq->req_usec_ts; newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent); newtp->rx_opt.ts_recent_stamp = ktime_get_seconds(); newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { + newtp->tcp_usec_ts = 0; newtp->rx_opt.ts_recent_stamp = 0; newtp->tcp_header_len = sizeof(struct tcphdr); } if (req->num_timeout) { + newtp->total_rto = req->num_timeout; newtp->undo_marker = treq->snt_isn; - newtp->retrans_stamp = div_u64(treq->snt_synack, - USEC_PER_SEC / TCP_TS_HZ); + if (newtp->tcp_usec_ts) { + newtp->retrans_stamp = treq->snt_synack; + newtp->total_rto_time = (u32)(tcp_clock_us() - + newtp->retrans_stamp) / USEC_PER_MSEC; + } else { + newtp->retrans_stamp = div_u64(treq->snt_synack, + USEC_PER_SEC / TCP_TS_HZ); + newtp->total_rto_time = tcp_clock_ms() - + newtp->retrans_stamp; + } + newtp->total_rto_recoveries = 1; } newtp->tsoffset = treq->ts_off; #ifdef CONFIG_TCP_MD5SIG newtp->md5sig_info = NULL; /*XXX*/ #endif +#ifdef CONFIG_TCP_AO + newtp->ao_info = NULL; + ao_key = treq->af_specific->ao_lookup(sk, req, + tcp_rsk(req)->ao_keyid, -1); + if (ao_key) + newtp->tcp_header_len += tcp_ao_len_aligned(ao_key); + #endif if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len) newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ab3b7b4b44..e3167ad965 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -170,10 +170,10 @@ static void tcp_event_data_sent(struct tcp_sock *tp, tp->lsndtime = now; /* If it is a reply for ato after last received - * packet, enter pingpong mode. + * packet, increase pingpong count. */ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - inet_csk_enter_pingpong_mode(sk); + inet_csk_inc_pingpong_cnt(sk); } /* Account for an ACK we sent. */ @@ -422,6 +422,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_FAST_OPEN_COOKIE BIT(8) #define OPTION_SMC BIT(9) #define OPTION_MPTCP BIT(10) +#define OPTION_AO BIT(11) static void smc_options_write(__be32 *ptr, u16 *options) { @@ -600,6 +601,44 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, } #endif +static __be32 *process_tcp_ao_options(struct tcp_sock *tp, + const struct tcp_request_sock *tcprsk, + struct tcp_out_options *opts, + struct tcp_key *key, __be32 *ptr) +{ +#ifdef CONFIG_TCP_AO + u8 maclen = tcp_ao_maclen(key->ao_key); + + if (tcprsk) { + u8 aolen = maclen + sizeof(struct tcp_ao_hdr); + + *ptr++ = htonl((TCPOPT_AO << 24) | (aolen << 16) | + (tcprsk->ao_keyid << 8) | + (tcprsk->ao_rcv_next)); + } else { + struct tcp_ao_key *rnext_key; + struct tcp_ao_info *ao_info; + + ao_info = rcu_dereference_check(tp->ao_info, + lockdep_sock_is_held(&tp->inet_conn.icsk_inet.sk)); + rnext_key = READ_ONCE(ao_info->rnext_key); + if (WARN_ON_ONCE(!rnext_key)) + return ptr; + *ptr++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (rnext_key->rcvid)); + } + opts->hash_location = (__u8 *)ptr; + ptr += maclen / sizeof(*ptr); + if (unlikely(maclen % sizeof(*ptr))) { + memset(ptr, TCPOPT_NOP, sizeof(*ptr)); + ptr++; + } +#endif + return ptr; +} + /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -614,19 +653,22 @@ static void bpf_skops_write_hdr_opt(struct sock *sk, struct sk_buff *skb, * (but it may well be that other scenarios fail similarly). */ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp, - struct tcp_out_options *opts) + const struct tcp_request_sock *tcprsk, + struct tcp_out_options *opts, + struct tcp_key *key) { __be32 *ptr = (__be32 *)(th + 1); u16 options = opts->options; /* mungable copy */ - if (unlikely(OPTION_MD5 & options)) { + if (tcp_key_is_md5(key)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; + } else if (tcp_key_is_ao(key)) { + ptr = process_tcp_ao_options(tp, tcprsk, opts, key, ptr); } - if (unlikely(opts->mss)) { *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | @@ -767,23 +809,25 @@ static void mptcp_set_option_cond(const struct request_sock *req, */ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) + struct tcp_key *key) { struct tcp_sock *tp = tcp_sk(sk); unsigned int remaining = MAX_TCP_OPTION_SPACE; struct tcp_fastopen_request *fastopen = tp->fastopen_req; + bool timestamps; - *md5 = NULL; -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - rcu_access_pointer(tp->md5sig_info)) { - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (*md5) { - opts->options |= OPTION_MD5; - remaining -= TCPOLEN_MD5SIG_ALIGNED; + /* Better than switch (key.type) as it has static branches */ + if (tcp_key_is_md5(key)) { + timestamps = false; + opts->options |= OPTION_MD5; + remaining -= TCPOLEN_MD5SIG_ALIGNED; + } else { + timestamps = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps); + if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + remaining -= tcp_ao_len_aligned(key->ao_key); } } -#endif /* We always get an MSS option. The option bytes which will be seen in * normal data packets should timestamps be used, must be in the MSS @@ -797,9 +841,9 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; - if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { + if (likely(timestamps)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; + opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -850,7 +894,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, - const struct tcp_md5sig_key *md5, + const struct tcp_key *key, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb) @@ -858,8 +902,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; -#ifdef CONFIG_TCP_MD5SIG - if (md5) { + if (tcp_key_is_md5(key)) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; @@ -870,8 +913,11 @@ static unsigned int tcp_synack_options(const struct sock *sk, */ if (synack_type != TCP_SYNACK_COOKIE) ireq->tstamp_ok &= !ireq->sack_ok; + } else if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + remaining -= tcp_ao_len_aligned(key->ao_key); + ireq->tstamp_ok &= !ireq->sack_ok; } -#endif /* We always send an MSS option. */ opts->mss = mss; @@ -884,7 +930,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; + opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) + + tcp_rsk(req)->ts_off; opts->tsecr = READ_ONCE(req->ts_recent); remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -921,7 +968,7 @@ static unsigned int tcp_synack_options(const struct sock *sk, */ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) + struct tcp_key *key) { struct tcp_sock *tp = tcp_sk(sk); unsigned int size = 0; @@ -929,21 +976,19 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb opts->options = 0; - *md5 = NULL; -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - rcu_access_pointer(tp->md5sig_info)) { - *md5 = tp->af_specific->md5_lookup(sk, sk); - if (*md5) { - opts->options |= OPTION_MD5; - size += TCPOLEN_MD5SIG_ALIGNED; - } + /* Better than switch (key.type) as it has static branches */ + if (tcp_key_is_md5(key)) { + opts->options |= OPTION_MD5; + size += TCPOLEN_MD5SIG_ALIGNED; + } else if (tcp_key_is_ao(key)) { + opts->options |= OPTION_AO; + size += tcp_ao_len_aligned(key->ao_key); } -#endif if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; + opts->tsval = skb ? tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -1076,7 +1121,8 @@ static void tcp_tasklet_func(struct tasklet_struct *t) #define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ TCPF_WRITE_TIMER_DEFERRED | \ TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_MTU_REDUCED_DEFERRED) + TCPF_MTU_REDUCED_DEFERRED | \ + TCPF_ACK_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -1100,16 +1146,6 @@ void tcp_release_cb(struct sock *sk) tcp_tsq_write(sk); __sock_put(sk); } - /* Here begins the tricky part : - * We are called from release_sock() with : - * 1) BH disabled - * 2) sk_lock.slock spinlock held - * 3) socket owned by us (sk->sk_lock.owned == 1) - * - * But following code is meant to be called from BH handlers, - * so we should keep BH disabled, but early release socket ownership - */ - sock_release_ownership(sk); if (flags & TCPF_WRITE_TIMER_DEFERRED) { tcp_write_timer_handler(sk); @@ -1123,6 +1159,8 @@ void tcp_release_cb(struct sock *sk) inet_csk(sk)->icsk_af_ops->mtu_reduced(sk); __sock_put(sk); } + if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) + tcp_send_ack(sk); } EXPORT_SYMBOL(tcp_release_cb); @@ -1207,7 +1245,7 @@ static void tcp_update_skb_after_send(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); if (sk->sk_pacing_status != SK_PACING_NONE) { - unsigned long rate = sk->sk_pacing_rate; + unsigned long rate = READ_ONCE(sk->sk_pacing_rate); /* Original sch_fq does not pace first 10 MSS * Note that tp->data_segs_out overflows after 2^32 packets, @@ -1250,7 +1288,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, struct tcp_out_options opts; unsigned int tcp_options_size, tcp_header_size; struct sk_buff *oskb = NULL; - struct tcp_md5sig_key *md5; + struct tcp_key key; struct tcphdr *th; u64 prior_wstamp; int err; @@ -1282,11 +1320,11 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); + tcp_get_current_key(sk, &key); if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { - tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); + tcp_options_size = tcp_syn_options(sk, skb, &opts, &key); } else { - tcp_options_size = tcp_established_options(sk, skb, &opts, - &md5); + tcp_options_size = tcp_established_options(sk, skb, &opts, &key); /* Force a PSH flag on all (GSO) packets to expedite GRO flush * at receiver : This slightly improve GRO performance. * Note that we do not force the PSH flag for non GSO packets, @@ -1367,16 +1405,25 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, th->window = htons(min(tp->rcv_wnd, 65535U)); } - tcp_options_write(th, tp, &opts); + tcp_options_write(th, tp, NULL, &opts, &key); + if (tcp_key_is_md5(&key)) { #ifdef CONFIG_TCP_MD5SIG - /* Calculate the MD5 hash, as we have all we need now */ - if (md5) { + /* Calculate the MD5 hash, as we have all we need now */ sk_gso_disable(sk); tp->af_specific->calc_md5_hash(opts.hash_location, - md5, sk, skb); - } + key.md5_key, sk, skb); #endif + } else if (tcp_key_is_ao(&key)) { + int err; + + err = tcp_ao_transmit_skb(sk, skb, key.ao_key, th, + opts.hash_location); + if (err) { + kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); + return -ENOMEM; + } + } /* BPF prog is the last one writing header option */ bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts); @@ -1703,14 +1750,6 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) */ mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); - /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ - if (icsk->icsk_af_ops->net_frag_header_len) { - const struct dst_entry *dst = __sk_dst_get(sk); - - if (dst && dst_allfrag(dst)) - mss_now -= icsk->icsk_af_ops->net_frag_header_len; - } - /* Clamp it (mss_clamp does not include tcp options) */ if (mss_now > tp->rx_opt.mss_clamp) mss_now = tp->rx_opt.mss_clamp; @@ -1738,21 +1777,11 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - int mtu; - mtu = mss + + return mss + tp->tcp_header_len + icsk->icsk_ext_hdr_len + icsk->icsk_af_ops->net_header_len; - - /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */ - if (icsk->icsk_af_ops->net_frag_header_len) { - const struct dst_entry *dst = __sk_dst_get(sk); - - if (dst && dst_allfrag(dst)) - mtu += icsk->icsk_af_ops->net_frag_header_len; - } - return mtu; } EXPORT_SYMBOL(tcp_mss_to_mtu); @@ -1827,7 +1856,7 @@ unsigned int tcp_current_mss(struct sock *sk) u32 mss_now; unsigned int header_len; struct tcp_out_options opts; - struct tcp_md5sig_key *md5; + struct tcp_key key; mss_now = tp->mss_cache; @@ -1836,8 +1865,8 @@ unsigned int tcp_current_mss(struct sock *sk) if (mtu != inet_csk(sk)->icsk_pmtu_cookie) mss_now = tcp_sync_mss(sk, mtu); } - - header_len = tcp_established_options(sk, NULL, &opts, &md5) + + tcp_get_current_key(sk, &key); + header_len = tcp_established_options(sk, NULL, &opts, &key) + sizeof(struct tcphdr); /* The mss_cache is sized based on tp->tcp_header_len, which assumes * some common options. If this is an odd packet (because we have SACK @@ -1979,7 +2008,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, unsigned long bytes; u32 r; - bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); + bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift); r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log); if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) @@ -2572,7 +2601,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, limit = max_t(unsigned long, 2 * skb->truesize, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); + READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift)); if (sk->sk_pacing_status == SK_PACING_NONE) limit = min_t(unsigned long, limit, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); @@ -2580,7 +2609,8 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, if (static_branch_unlikely(&tcp_tx_delay_enabled) && tcp_sk(sk)->tcp_tx_delay) { - u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay; + u64 extra_bytes = (u64)READ_ONCE(sk->sk_pacing_rate) * + tcp_sk(sk)->tcp_tx_delay; /* TSQ is based on skb truesize sum (sk_wmem_alloc), so we * approximate our needs assuming an ~100% skb->truesize overhead. @@ -3391,7 +3421,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) /* Save stamp of the first (attempted) retransmit. */ if (!tp->retrans_stamp) - tp->retrans_stamp = tcp_skb_timestamp(skb); + tp->retrans_stamp = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb); if (tp->undo_retrans < 0) tp->undo_retrans = 0; @@ -3639,8 +3669,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, { struct inet_request_sock *ireq = inet_rsk(req); const struct tcp_sock *tp = tcp_sk(sk); - struct tcp_md5sig_key *md5 = NULL; struct tcp_out_options opts; + struct tcp_key key = {}; struct sk_buff *skb; int tcp_header_size; struct tcphdr *th; @@ -3690,16 +3720,45 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); - md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif + if (tcp_rsk_used_ao(req)) { +#ifdef CONFIG_TCP_AO + struct tcp_ao_key *ao_key = NULL; + u8 keyid = tcp_rsk(req)->ao_keyid; + + ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), + keyid, -1); + /* If there is no matching key - avoid sending anything, + * especially usigned segments. It could try harder and lookup + * for another peer-matching key, but the peer has requested + * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. + */ + if (unlikely(!ao_key)) { + rcu_read_unlock(); + kfree_skb(skb); + net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n", + keyid); + return NULL; + } + key.ao_key = ao_key; + key.type = TCP_KEY_AO; +#endif + } else { +#ifdef CONFIG_TCP_MD5SIG + key.md5_key = tcp_rsk(req)->af_specific->req_md5_lookup(sk, + req_to_sk(req)); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4); /* bpf program will be interested in the tcp_flags */ TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK; - tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, - foc, synack_type, - syn_skb) + sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, + &key, foc, synack_type, syn_skb) + + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -3719,15 +3778,24 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rsk_rcv_wnd, 65535U)); - tcp_options_write(th, NULL, &opts); + tcp_options_write(th, NULL, tcp_rsk(req), &opts, &key); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); -#ifdef CONFIG_TCP_MD5SIG /* Okay, we have all we need - do the md5 hash if needed */ - if (md5) + if (tcp_key_is_md5(&key)) { +#ifdef CONFIG_TCP_MD5SIG tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location, - md5, req_to_sk(req), skb); + key.md5_key, req_to_sk(req), skb); +#endif + } else if (tcp_key_is_ao(&key)) { +#ifdef CONFIG_TCP_AO + tcp_rsk(req)->af_specific->ao_synack_hash(opts.hash_location, + key.ao_key, req, skb, + opts.hash_location - (u8 *)th, 0); +#endif + } +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_unlock(); #endif @@ -3775,6 +3843,8 @@ static void tcp_connect_init(struct sock *sk) if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps)) tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED; + tcp_ao_connect_init(sk); + /* If user gave his TCP_MAXSEG, record it to clamp */ if (tp->rx_opt.user_mss) tp->rx_opt.mss_clamp = tp->rx_opt.user_mss; @@ -3957,6 +4027,53 @@ int tcp_connect(struct sock *sk) tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL); +#if defined(CONFIG_TCP_MD5SIG) && defined(CONFIG_TCP_AO) + /* Has to be checked late, after setting daddr/saddr/ops. + * Return error if the peer has both a md5 and a tcp-ao key + * configured as this is ambiguous. + */ + if (unlikely(rcu_dereference_protected(tp->md5sig_info, + lockdep_sock_is_held(sk)))) { + bool needs_ao = !!tp->af_specific->ao_lookup(sk, sk, -1, -1); + bool needs_md5 = !!tp->af_specific->md5_lookup(sk, sk); + struct tcp_ao_info *ao_info; + + ao_info = rcu_dereference_check(tp->ao_info, + lockdep_sock_is_held(sk)); + if (ao_info) { + /* This is an extra check: tcp_ao_required() in + * tcp_v{4,6}_parse_md5_keys() should prevent adding + * md5 keys on ao_required socket. + */ + needs_ao |= ao_info->ao_required; + WARN_ON_ONCE(ao_info->ao_required && needs_md5); + } + if (needs_md5 && needs_ao) + return -EKEYREJECTED; + + /* If we have a matching md5 key and no matching tcp-ao key + * then free up ao_info if allocated. + */ + if (needs_md5) { + tcp_ao_destroy_sock(sk, false); + } else if (needs_ao) { + tcp_clear_md5_list(sk); + kfree(rcu_replace_pointer(tp->md5sig_info, NULL, + lockdep_sock_is_held(sk))); + } + } +#endif +#ifdef CONFIG_TCP_AO + if (unlikely(rcu_dereference_protected(tp->ao_info, + lockdep_sock_is_held(sk)))) { + /* Don't allow connecting if ao is configured but no + * matching key is found. + */ + if (!tp->af_specific->ao_lookup(sk, sk, -1, -1)) + return -EKEYREJECTED; + } +#endif + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ @@ -3973,7 +4090,7 @@ int tcp_connect(struct sock *sk) tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); tcp_mstamp_refresh(tp); - tp->retrans_stamp = tcp_time_stamp(tp); + tp->retrans_stamp = tcp_time_stamp_ts(tp); tcp_connect_queue_skb(sk, buff); tcp_ecn_send_syn(sk, buff); tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); diff --git a/net/ipv4/tcp_sigpool.c b/net/ipv4/tcp_sigpool.c new file mode 100644 index 0000000000..8512cb09eb --- /dev/null +++ b/net/ipv4/tcp_sigpool.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <crypto/hash.h> +#include <linux/cpu.h> +#include <linux/kref.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/workqueue.h> +#include <net/tcp.h> + +static size_t __scratch_size; +static DEFINE_PER_CPU(void __rcu *, sigpool_scratch); + +struct sigpool_entry { + struct crypto_ahash *hash; + const char *alg; + struct kref kref; + uint16_t needs_key:1, + reserved:15; +}; + +#define CPOOL_SIZE (PAGE_SIZE / sizeof(struct sigpool_entry)) +static struct sigpool_entry cpool[CPOOL_SIZE]; +static unsigned int cpool_populated; +static DEFINE_MUTEX(cpool_mutex); + +/* Slow-path */ +struct scratches_to_free { + struct rcu_head rcu; + unsigned int cnt; + void *scratches[]; +}; + +static void free_old_scratches(struct rcu_head *head) +{ + struct scratches_to_free *stf; + + stf = container_of(head, struct scratches_to_free, rcu); + while (stf->cnt--) + kfree(stf->scratches[stf->cnt]); + kfree(stf); +} + +/** + * sigpool_reserve_scratch - re-allocates scratch buffer, slow-path + * @size: request size for the scratch/temp buffer + */ +static int sigpool_reserve_scratch(size_t size) +{ + struct scratches_to_free *stf; + size_t stf_sz = struct_size(stf, scratches, num_possible_cpus()); + int cpu, err = 0; + + lockdep_assert_held(&cpool_mutex); + if (__scratch_size >= size) + return 0; + + stf = kmalloc(stf_sz, GFP_KERNEL); + if (!stf) + return -ENOMEM; + stf->cnt = 0; + + size = max(size, __scratch_size); + cpus_read_lock(); + for_each_possible_cpu(cpu) { + void *scratch, *old_scratch; + + scratch = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); + if (!scratch) { + err = -ENOMEM; + break; + } + + old_scratch = rcu_replace_pointer(per_cpu(sigpool_scratch, cpu), + scratch, lockdep_is_held(&cpool_mutex)); + if (!cpu_online(cpu) || !old_scratch) { + kfree(old_scratch); + continue; + } + stf->scratches[stf->cnt++] = old_scratch; + } + cpus_read_unlock(); + if (!err) + __scratch_size = size; + + call_rcu(&stf->rcu, free_old_scratches); + return err; +} + +static void sigpool_scratch_free(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + kfree(rcu_replace_pointer(per_cpu(sigpool_scratch, cpu), + NULL, lockdep_is_held(&cpool_mutex))); + __scratch_size = 0; +} + +static int __cpool_try_clone(struct crypto_ahash *hash) +{ + struct crypto_ahash *tmp; + + tmp = crypto_clone_ahash(hash); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + + crypto_free_ahash(tmp); + return 0; +} + +static int __cpool_alloc_ahash(struct sigpool_entry *e, const char *alg) +{ + struct crypto_ahash *cpu0_hash; + int ret; + + e->alg = kstrdup(alg, GFP_KERNEL); + if (!e->alg) + return -ENOMEM; + + cpu0_hash = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(cpu0_hash)) { + ret = PTR_ERR(cpu0_hash); + goto out_free_alg; + } + + e->needs_key = crypto_ahash_get_flags(cpu0_hash) & CRYPTO_TFM_NEED_KEY; + + ret = __cpool_try_clone(cpu0_hash); + if (ret) + goto out_free_cpu0_hash; + e->hash = cpu0_hash; + kref_init(&e->kref); + return 0; + +out_free_cpu0_hash: + crypto_free_ahash(cpu0_hash); +out_free_alg: + kfree(e->alg); + e->alg = NULL; + return ret; +} + +/** + * tcp_sigpool_alloc_ahash - allocates pool for ahash requests + * @alg: name of async hash algorithm + * @scratch_size: reserve a tcp_sigpool::scratch buffer of this size + */ +int tcp_sigpool_alloc_ahash(const char *alg, size_t scratch_size) +{ + int i, ret; + + /* slow-path */ + mutex_lock(&cpool_mutex); + ret = sigpool_reserve_scratch(scratch_size); + if (ret) + goto out; + for (i = 0; i < cpool_populated; i++) { + if (!cpool[i].alg) + continue; + if (strcmp(cpool[i].alg, alg)) + continue; + + /* pairs with tcp_sigpool_release() */ + if (!kref_get_unless_zero(&cpool[i].kref)) + kref_init(&cpool[i].kref); + ret = i; + goto out; + } + + for (i = 0; i < cpool_populated; i++) { + if (!cpool[i].alg) + break; + } + if (i >= CPOOL_SIZE) { + ret = -ENOSPC; + goto out; + } + + ret = __cpool_alloc_ahash(&cpool[i], alg); + if (!ret) { + ret = i; + if (i == cpool_populated) + cpool_populated++; + } +out: + mutex_unlock(&cpool_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(tcp_sigpool_alloc_ahash); + +static void __cpool_free_entry(struct sigpool_entry *e) +{ + crypto_free_ahash(e->hash); + kfree(e->alg); + memset(e, 0, sizeof(*e)); +} + +static void cpool_cleanup_work_cb(struct work_struct *work) +{ + bool free_scratch = true; + unsigned int i; + + mutex_lock(&cpool_mutex); + for (i = 0; i < cpool_populated; i++) { + if (kref_read(&cpool[i].kref) > 0) { + free_scratch = false; + continue; + } + if (!cpool[i].alg) + continue; + __cpool_free_entry(&cpool[i]); + } + if (free_scratch) + sigpool_scratch_free(); + mutex_unlock(&cpool_mutex); +} + +static DECLARE_WORK(cpool_cleanup_work, cpool_cleanup_work_cb); +static void cpool_schedule_cleanup(struct kref *kref) +{ + schedule_work(&cpool_cleanup_work); +} + +/** + * tcp_sigpool_release - decreases number of users for a pool. If it was + * the last user of the pool, releases any memory that was consumed. + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + */ +void tcp_sigpool_release(unsigned int id) +{ + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) + return; + + /* slow-path */ + kref_put(&cpool[id].kref, cpool_schedule_cleanup); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_release); + +/** + * tcp_sigpool_get - increases number of users (refcounter) for a pool + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + */ +void tcp_sigpool_get(unsigned int id) +{ + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) + return; + kref_get(&cpool[id].kref); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_get); + +int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c) __cond_acquires(RCU_BH) +{ + struct crypto_ahash *hash; + + rcu_read_lock_bh(); + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) { + rcu_read_unlock_bh(); + return -EINVAL; + } + + hash = crypto_clone_ahash(cpool[id].hash); + if (IS_ERR(hash)) { + rcu_read_unlock_bh(); + return PTR_ERR(hash); + } + + c->req = ahash_request_alloc(hash, GFP_ATOMIC); + if (!c->req) { + crypto_free_ahash(hash); + rcu_read_unlock_bh(); + return -ENOMEM; + } + ahash_request_set_callback(c->req, 0, NULL, NULL); + + /* Pairs with tcp_sigpool_reserve_scratch(), scratch area is + * valid (allocated) until tcp_sigpool_end(). + */ + c->scratch = rcu_dereference_bh(*this_cpu_ptr(&sigpool_scratch)); + return 0; +} +EXPORT_SYMBOL_GPL(tcp_sigpool_start); + +void tcp_sigpool_end(struct tcp_sigpool *c) __releases(RCU_BH) +{ + struct crypto_ahash *hash = crypto_ahash_reqtfm(c->req); + + rcu_read_unlock_bh(); + ahash_request_free(c->req); + crypto_free_ahash(hash); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_end); + +/** + * tcp_sigpool_algo - return algorithm of tcp_sigpool + * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() + * @buf: buffer to return name of algorithm + * @buf_len: size of @buf + */ +size_t tcp_sigpool_algo(unsigned int id, char *buf, size_t buf_len) +{ + if (WARN_ON_ONCE(id >= cpool_populated || !cpool[id].alg)) + return -EINVAL; + + return strscpy(buf, cpool[id].alg, buf_len); +} +EXPORT_SYMBOL_GPL(tcp_sigpool_algo); + +/** + * tcp_sigpool_hash_skb_data - hash data in skb with initialized tcp_sigpool + * @hp: tcp_sigpool pointer + * @skb: buffer to add sign for + * @header_len: TCP header length for this segment + */ +int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, + const struct sk_buff *skb, + unsigned int header_len) +{ + const unsigned int head_data_len = skb_headlen(skb) > header_len ? + skb_headlen(skb) - header_len : 0; + const struct skb_shared_info *shi = skb_shinfo(skb); + const struct tcphdr *tp = tcp_hdr(skb); + struct ahash_request *req = hp->req; + struct sk_buff *frag_iter; + struct scatterlist sg; + unsigned int i; + + sg_init_table(&sg, 1); + + sg_set_buf(&sg, ((u8 *)tp) + header_len, head_data_len); + ahash_request_set_crypt(req, &sg, NULL, head_data_len); + if (crypto_ahash_update(req)) + return 1; + + for (i = 0; i < shi->nr_frags; ++i) { + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); + struct page *page; + + page = skb_frag_page(f) + (offset >> PAGE_SHIFT); + sg_set_page(&sg, page, skb_frag_size(f), offset_in_page(offset)); + ahash_request_set_crypt(req, &sg, NULL, skb_frag_size(f)); + if (crypto_ahash_update(req)) + return 1; + } + + skb_walk_frags(skb, frag_iter) + if (tcp_sigpool_hash_skb_data(hp, frag_iter, 0)) + return 1; + + return 0; +} +EXPORT_SYMBOL(tcp_sigpool_hash_skb_data); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Per-CPU pool of crypto requests"); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 984ab4a042..1f9f6c1c19 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -26,14 +26,18 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); - u32 elapsed, start_ts, user_timeout; + const struct tcp_sock *tp = tcp_sk(sk); + u32 elapsed, user_timeout; s32 remaining; - start_ts = tcp_sk(sk)->retrans_stamp; user_timeout = READ_ONCE(icsk->icsk_user_timeout); if (!user_timeout) return icsk->icsk_rto; - elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; + + elapsed = tcp_time_stamp_ts(tp) - tp->retrans_stamp; + if (tp->tcp_usec_ts) + elapsed /= USEC_PER_MSEC; + remaining = user_timeout - elapsed; if (remaining <= 0) return 1; /* user timeout has passed; fire ASAP */ @@ -212,12 +216,13 @@ static bool retransmits_timed_out(struct sock *sk, unsigned int boundary, unsigned int timeout) { - unsigned int start_ts; + struct tcp_sock *tp = tcp_sk(sk); + unsigned int start_ts, delta; if (!inet_csk(sk)->icsk_retransmits) return false; - start_ts = tcp_sk(sk)->retrans_stamp; + start_ts = tp->retrans_stamp; if (likely(timeout == 0)) { unsigned int rto_base = TCP_RTO_MIN; @@ -226,7 +231,12 @@ static bool retransmits_timed_out(struct sock *sk, timeout = tcp_model_timeout(sk, boundary, rto_base); } - return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; + if (tp->tcp_usec_ts) { + /* delta maybe off up to a jiffy due to timer granularity. */ + delta = tp->tcp_mstamp - start_ts + jiffies_to_usecs(1); + return (s32)(delta - timeout * USEC_PER_MSEC) >= 0; + } + return (s32)(tcp_time_stamp_ts(tp) - start_ts - timeout) >= 0; } /* A write timeout has occurred. Process the after effects. */ @@ -322,7 +332,7 @@ void tcp_delack_timer_handler(struct sock *sk) if (inet_csk_ack_scheduled(sk)) { if (!inet_csk_in_pingpong_mode(sk)) { /* Delayed ACK missed: inflate ATO. */ - icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. @@ -394,7 +404,7 @@ static void tcp_probe_timer(struct sock *sk) if (user_timeout && (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >= msecs_to_jiffies(user_timeout)) - goto abort; + goto abort; } max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2); if (sock_flag(sk, SOCK_DEAD)) { @@ -415,6 +425,19 @@ abort: tcp_write_err(sk); } } +static void tcp_update_rto_stats(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + + if (!icsk->icsk_retransmits) { + tp->total_rto_recoveries++; + tp->rto_stamp = tcp_time_stamp_ms(tp); + } + icsk->icsk_retransmits++; + tp->total_rto++; +} + /* * Timer for Fast Open socket to retransmit SYNACK. Note that the * sk here is the child socket, not the parent (listener) socket. @@ -447,28 +470,26 @@ static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req) */ inet_rtx_syn_ack(sk, req); req->num_timeout++; - icsk->icsk_retransmits++; + tcp_update_rto_stats(sk); if (!tp->retrans_stamp) - tp->retrans_stamp = tcp_time_stamp(tp); + tp->retrans_stamp = tcp_time_stamp_ts(tp); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, req->timeout << req->num_timeout, TCP_RTO_MAX); } static bool tcp_rtx_probe0_timed_out(const struct sock *sk, - const struct sk_buff *skb) + const struct sk_buff *skb, + u32 rtx_delta) { const struct tcp_sock *tp = tcp_sk(sk); const int timeout = TCP_RTO_MAX * 2; - u32 rcv_delta, rtx_delta; + u32 rcv_delta; rcv_delta = inet_csk(sk)->icsk_timeout - tp->rcv_tstamp; if (rcv_delta <= timeout) return false; - rtx_delta = (u32)msecs_to_jiffies(tcp_time_stamp(tp) - - (tp->retrans_stamp ?: tcp_skb_timestamp(skb))); - - return rtx_delta > timeout; + return msecs_to_jiffies(rtx_delta) > timeout; } /** @@ -521,7 +542,11 @@ void tcp_retransmit_timer(struct sock *sk) struct inet_sock *inet = inet_sk(sk); u32 rtx_delta; - rtx_delta = tcp_time_stamp(tp) - (tp->retrans_stamp ?: tcp_skb_timestamp(skb)); + rtx_delta = tcp_time_stamp_ts(tp) - (tp->retrans_stamp ?: + tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb)); + if (tp->tcp_usec_ts) + rtx_delta /= USEC_PER_MSEC; + if (sk->sk_family == AF_INET) { net_dbg_ratelimited("Probing zero-window on %pI4:%u/%u, seq=%u:%u, recv %ums ago, lasting %ums\n", &inet->inet_daddr, ntohs(inet->inet_dport), @@ -538,7 +563,7 @@ void tcp_retransmit_timer(struct sock *sk) rtx_delta); } #endif - if (tcp_rtx_probe0_timed_out(sk, skb)) { + if (tcp_rtx_probe0_timed_out(sk, skb, rtx_delta)) { tcp_write_err(sk); goto out; } @@ -575,7 +600,7 @@ void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk); - icsk->icsk_retransmits++; + tcp_update_rto_stats(sk); if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * Let senders fight for local resources conservatively. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7be4ddc80d..e474b20190 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -750,7 +750,7 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); - if (inet->pmtudisc != IP_PMTUDISC_DONT) { + if (READ_ONCE(inet->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; break; @@ -1055,6 +1055,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; + int uc_index; if (len > 0xFFFF) return -EMSGSIZE; @@ -1175,25 +1176,26 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (scope == RT_SCOPE_LINK) connected = 0; + uc_index = READ_ONCE(inet->uc_index); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) - ipc.oif = inet->mc_index; + ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) - saddr = inet->mc_addr; + saddr = READ_ONCE(inet->mc_addr); connected = 0; } else if (!ipc.oif) { - ipc.oif = inet->uc_index; - } else if (ipv4_is_lbcast(daddr) && inet->uc_index) { + ipc.oif = uc_index; + } else if (ipv4_is_lbcast(daddr) && uc_index) { /* oif is set, packet is to local broadcast and * uc_index is set. oif is most likely set * by sk_bound_dev_if. If uc_index != oif check if the * oif is an L3 master and uc_index is an L3 slave. * If so, we want to allow the send using the uc_index. */ - if (ipc.oif != inet->uc_index && + if (ipc.oif != uc_index && ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk), - inet->uc_index)) { - ipc.oif = inet->uc_index; + uc_index)) { + ipc.oif = uc_index; } } @@ -1587,12 +1589,7 @@ int udp_init_sock(struct sock *sk) void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { - if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { - bool slow = lock_sock_fast(sk); - - sk_peek_offset_bwd(sk, len); - unlock_sock_fast(sk, slow); - } + sk_peek_offset_bwd(sk, len); if (!skb_unref(skb)) return; @@ -2167,7 +2164,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); return __udp_queue_rcv_skb(sk, skb); csum_error: @@ -2628,6 +2625,19 @@ void udp_destroy_sock(struct sock *sk) } } +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, + struct sock *sk) +{ +#ifdef CONFIG_XFRM + if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) { + if (family == AF_INET) + WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv); + else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) + WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv); + } +#endif +} + /* * Socket option code for UDP */ @@ -2677,6 +2687,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case 0: #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: + set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk); + fallthrough; case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) @@ -2719,6 +2731,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, udp_tunnel_encap_enable(sk); udp_assign_bit(GRO_ENABLED, sk, valbool); udp_assign_bit(ACCEPT_L4, sk, valbool); + set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk); break; /* diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index de3f2d31f5..dc41a22ee8 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -296,5 +296,6 @@ static void __exit udp_diag_exit(void) module_init(udp_diag_init); module_exit(udp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("UDP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */); diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 1e7e4aecdc..a87defb2b1 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -204,4 +204,53 @@ struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, } EXPORT_SYMBOL_GPL(udp_tun_rx_dst); +struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, int oif, + __be32 *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 tos, + struct dst_cache *dst_cache) +{ + struct rtable *rt = NULL; + struct flowi4 fl4; + +#ifdef CONFIG_DST_CACHE + if (dst_cache) { + rt = dst_cache_get_ip4(dst_cache, saddr); + if (rt) + return rt; + } +#endif + + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = IPPROTO_UDP; + fl4.flowi4_oif = oif; + fl4.daddr = key->u.ipv4.dst; + fl4.saddr = key->u.ipv4.src; + fl4.fl4_dport = dport; + fl4.fl4_sport = sport; + fl4.flowi4_tos = RT_TOS(tos); + fl4.flowi4_flags = key->flow_flags; + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (rt->dst.dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + ip_rt_put(rt); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (dst_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); +#endif + *saddr = fl4.saddr; + return rt; +} +EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup); + MODULE_LICENSE("GPL"); diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index 0292197497..b6d2d16189 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -47,7 +47,7 @@ struct udp_tunnel_nic { unsigned int n_tables; unsigned long missed; - struct udp_tunnel_nic_table_entry **entries; + struct udp_tunnel_nic_table_entry *entries[] __counted_by(n_tables); }; /* We ensure all work structs are done using driver state, but not the code. @@ -725,16 +725,12 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, struct udp_tunnel_nic *utn; unsigned int i; - utn = kzalloc(sizeof(*utn), GFP_KERNEL); + utn = kzalloc(struct_size(utn, entries, n_tables), GFP_KERNEL); if (!utn) return NULL; utn->n_tables = n_tables; INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); - utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL); - if (!utn->entries) - goto err_free_utn; - for (i = 0; i < n_tables; i++) { utn->entries[i] = kcalloc(info->tables[i].n_entries, sizeof(*utn->entries[i]), GFP_KERNEL); @@ -747,8 +743,6 @@ udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, err_free_prev_entries: while (i--) kfree(utn->entries[i]); - kfree(utn->entries); -err_free_utn: kfree(utn); return NULL; } @@ -759,7 +753,6 @@ static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn) for (i = 0; i < utn->n_tables; i++) kfree(utn->entries[i]); - kfree(utn->entries); kfree(utn); } diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 183f6dc372..c54676998e 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -17,6 +17,8 @@ #include <linux/netfilter_ipv4.h> #include <net/ip.h> #include <net/xfrm.h> +#include <net/protocol.h> +#include <net/gro.h> static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) return 0; } -/* If it's a keepalive packet, then just eat it. - * If it's an encapsulated packet, then pass it to the - * IPsec xfrm input. - * Returns 0 if skb passed to xfrm or was dropped. - * Returns >0 if skb should be passed to UDP. - * Returns <0 if skb should be resubmitted (-ret is protocol) - */ -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP_NON_IKE: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && udpdata32[0] == 0 && udpdata32[1] == 0) { @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) - goto drop; + return -EINVAL; /* Now we can update and verify the packet length... */ iph = ip_hdr(skb); @@ -147,25 +142,89 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(ntohs(iph->tot_len) - len); if (skb->len < iphlen + len) { /* packet is too small!?! */ - goto drop; + return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); + if (pull) { + __skb_pull(skb, len); + skb_reset_transport_header(skb); + } else { + skb_set_transport_header(skb, len); + } /* process ESP */ - return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - -drop: - kfree_skb(skb); return 0; } + +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + + ret = __xfrm4_udp_encap_rcv(sk, skb, true); + if (!ret) + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, + udp_sk(sk)->encap_type); + + if (ret < 0) { + kfree_skb(skb); + return 0; + } + + return ret; +} EXPORT_SYMBOL(xfrm4_udp_encap_rcv); +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + const struct net_offload *ops; + struct sk_buff *pp = NULL; + int ret; + + offset = offset - sizeof(struct udphdr); + + if (!pskb_pull(skb, offset)) + return NULL; + + rcu_read_lock(); + ops = rcu_dereference(inet_offloads[IPPROTO_ESP]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + + ret = __xfrm4_udp_encap_rcv(sk, skb, false); + if (ret) + goto out; + + skb_push(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + rcu_read_unlock(); + + return pp; + +out: + rcu_read_unlock(); + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} +EXPORT_SYMBOL(xfrm4_gro_udp_encap_rcv); + int xfrm4_rcv(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 3036a45e8a..d283c59df4 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -52,4 +52,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o obj-y += mcast_snoop.o +obj-$(CONFIG_TCP_AO) += tcp_ao.o endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b007d098ff..5a839c5fb1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -236,6 +236,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, + .ra_honor_pio_life = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -297,6 +298,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ndisc_evict_nocarrier = 1, + .ra_honor_pio_life = 0, }; /* Check if link is ready: is it up and is a valid qdisc available */ @@ -706,6 +708,22 @@ errout: return err; } +/* Combine dev_addr_genid and dev_base_seq to detect changes. + */ +static u32 inet6_base_seq(const struct net *net) +{ + u32 res = atomic_read(&net->ipv6.dev_addr_genid) + + net->dev_base_seq; + + /* Must not return 0 (see nl_dump_check_consistent()). + * Chose a value far away from 0. + */ + if (!res) + res = 0x80000000; + return res; +} + + static int inet6_netconf_dump_devconf(struct sk_buff *skb, struct netlink_callback *cb) { @@ -739,8 +757,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb, idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); - cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ - net->dev_base_seq; + cb->seq = inet6_base_seq(net); hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -1397,6 +1414,7 @@ retry: idev->cnf.temp_valid_lft + age); cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor; cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft); + cfg.preferred_lft = min_t(__u32, cfg.valid_lft, cfg.preferred_lft); cfg.plen = ifp->prefix_len; tmp_tstamp = ifp->tstamp; @@ -2657,22 +2675,23 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = !create && stored_lft; + + if (update_lft && !in6_dev->cnf.ra_honor_pio_life) { const u32 minimum_lft = min_t(u32, stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); - - /* RFC4862 Section 5.5.3e: - * "Note that the preferred lifetime of the - * corresponding address is always reset to - * the Preferred Lifetime in the received - * Prefix Information option, regardless of - * whether the valid lifetime is also reset or - * ignored." - * - * So we should always update prefered_lft here. - */ - update_lft = 1; } if (update_lft) { @@ -5358,7 +5377,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, } rcu_read_lock(); - cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq; + cb->seq = inet6_base_seq(tgt_net); for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &tgt_net->dev_index_head[h]; @@ -6842,6 +6861,15 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "ra_honor_pio_life", + .data = &ipv6_devconf.ra_honor_pio_life, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #ifdef CONFIG_IPV6_ROUTER_PREF { .procname = "accept_ra_rtr_pref", diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 507a8353a6..c008d21925 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) + = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) + = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4375bfa4f6..959bfd9f63 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -220,10 +220,11 @@ lookup_protocol: inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; - np->mc_loop = 1; - np->mc_all = 1; + inet6_set_bit(MC6_LOOP, sk); + inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; - np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED; + inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect & + FLOWLABEL_REFLECT_ESTABLISHED); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); @@ -540,7 +541,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); @@ -1052,6 +1053,7 @@ static const struct ipv6_stub ipv6_stub_impl = { #if IS_ENABLED(CONFIG_XFRM) .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, + .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv, .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif .nd_tbl = &nd_tbl, @@ -1064,6 +1066,7 @@ static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, + .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 01005035ad..2016e90e6e 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -13,8 +13,8 @@ #define pr_fmt(fmt) "IPv6: " fmt -#include <crypto/algapi.h> #include <crypto/hash.h> +#include <crypto/utils.h> #include <linux/module.h> #include <linux/slab.h> #include <net/ip.h> @@ -51,9 +51,7 @@ static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags, { unsigned int len; - len = size + crypto_ahash_digestsize(ahash) + - (crypto_ahash_alignmask(ahash) & - ~(crypto_tfm_ctx_alignment() - 1)); + len = size + crypto_ahash_digestsize(ahash); len = ALIGN(len, crypto_tfm_ctx_alignment()); @@ -75,10 +73,9 @@ static inline u8 *ah_tmp_auth(u8 *tmp, unsigned int offset) return tmp + offset; } -static inline u8 *ah_tmp_icv(struct crypto_ahash *ahash, void *tmp, - unsigned int offset) +static inline u8 *ah_tmp_icv(void *tmp, unsigned int offset) { - return PTR_ALIGN((u8 *)tmp + offset, crypto_ahash_alignmask(ahash) + 1); + return tmp + offset; } static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash, @@ -299,7 +296,7 @@ static void ah6_output_done(void *data, int err) iph_base = AH_SKB_CB(skb)->tmp; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(ahp->ahash, iph_ext, extlen); + icv = ah_tmp_icv(iph_ext, extlen); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); memcpy(top_iph, iph_base, IPV6HDR_BASELEN); @@ -362,7 +359,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) iph_ext = ah_tmp_ext(iph_base); seqhi = (__be32 *)((char *)iph_ext + extlen); - icv = ah_tmp_icv(ahash, seqhi, seqhi_len); + icv = ah_tmp_icv(seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); seqhisg = sg + nfrags; @@ -468,7 +465,7 @@ static void ah6_input_done(void *data, int err) work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len); + icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) @@ -576,7 +573,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len); seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); - icv = ah_tmp_icv(ahash, seqhi, seqhi_len); + icv = ah_tmp_icv(seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); seqhisg = sg + nfrags; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 41ebc4e574..cc6a502db3 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -80,7 +80,8 @@ int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr) struct flowi6 fl6; int err = 0; - if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (np->flow_label & IPV6_FLOWLABEL_MASK)) { flowlabel = fl6_sock_lookup(sk, np->flow_label); if (IS_ERR(flowlabel)) return -EINVAL; @@ -163,7 +164,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (ipv6_addr_any(&usin->sin6_addr)) { @@ -305,11 +306,10 @@ static void ipv6_icmp_error_rfc4884(const struct sk_buff *skb, void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { - struct ipv6_pinfo *np = inet6_sk(sk); struct icmp6hdr *icmph = icmp6_hdr(skb); struct sock_exterr_skb *serr; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = skb_clone(skb, GFP_ATOMIC); @@ -332,7 +332,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __skb_pull(skb, payload - skb->data); - if (inet6_sk(sk)->recverr_rfc4884) + if (inet6_test_bit(RECVERR6_RFC4884, sk)) ipv6_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884); skb_reset_transport_header(skb); @@ -344,12 +344,11 @@ EXPORT_SYMBOL_GPL(ipv6_icmp_error); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info) { - const struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct ipv6hdr *iph; struct sk_buff *skb; - if (!np->recverr) + if (!inet6_test_bit(RECVERR6, sk)) return; skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); @@ -493,7 +492,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), struct ipv6hdr, daddr); sin->sin6_addr = ip6h->daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = ip6_flowinfo(ip6h); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index a189e08370..527b7caddb 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen) int off = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr; - if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP)) + /* ESP or ESPINUDP */ + if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP || + ipv6_hdr->nexthdr == NEXTHDR_UDP)) return offsetof(struct ipv6hdr, nexthdr); while (off < nhlen) { @@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; + int encap_type = 0; __be32 seq; __be32 spi; int nhoff; + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) + encap_type = UDP_ENCAP_ESPINUDP; + if (!pskb_pull(skb, offset)) return NULL; @@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head, /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ - xfrm_input(skb, IPPROTO_ESP, spi, -2); + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4952ae7924..02e9ffb63a 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop, case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; + + nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) @@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) if (!skb_valid_dst(skb)) ip6_route_input(skb); + /* About to mangle packet header */ + if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) + goto drop; + + /* Trace pointer may have changed */ + trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + + optoff + sizeof(*hdr)); + ioam6_fill_trace_data(skb, ns, trace, true); break; default: diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 93a594a901..f624270971 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, return dst; } - err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6); + err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; @@ -588,7 +588,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); @@ -791,7 +791,7 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) msg.offset = 0; msg.type = type; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0c50dcd35f..80043e4611 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), - np->tclass, sk->sk_priority); + np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f6f5b83dd9..7563f8c6aa 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -46,7 +46,7 @@ struct ioam6_lwt { struct ioam6_lwt_encap tuninfo; }; -static struct netlink_range_validation freq_range = { +static const struct netlink_range_validation freq_range = { .min = IOAM6_IPTUNNEL_FREQ_MIN, .max = IOAM6_IPTUNNEL_FREQ_MAX, }; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b3ca4beb44..eca07e10e2 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -513,7 +513,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, return 0; } - if (np->repflow) { + if (inet6_test_bit(REPFLOW, sk)) { freq->flr_label = np->flow_label; return 0; } @@ -551,10 +551,10 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq) if (freq->flr_flags & IPV6_FL_F_REFLECT) { if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - if (!np->repflow) + if (!inet6_test_bit(REPFLOW, sk)) return -ESRCH; np->flow_label = 0; - np->repflow = 0; + inet6_clear_bit(REPFLOW, sk); return 0; } @@ -626,7 +626,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq, if (sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; - np->repflow = 1; + inet6_set_bit(REPFLOW, sk); return 0; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1121082901..a722a43dd6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -117,6 +117,8 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * return res; } + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + rcu_read_lock(); nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); @@ -176,6 +178,16 @@ ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, return ret; } +static int ip6_finish_output_gso(struct net *net, struct sock *sk, + struct sk_buff *skb, unsigned int mtu) +{ + if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && + !skb_gso_validate_network_len(skb, mtu)) + return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + + return ip6_finish_output2(net, sk, skb); +} + static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; @@ -189,17 +201,14 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff #endif mtu = ip6_skb_dst_mtu(skb); - if (skb_is_gso(skb) && - !(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && - !skb_gso_validate_network_len(skb, mtu)) - return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); + if (skb_is_gso(skb)) + return ip6_finish_output_gso(net, sk, skb, mtu); - if ((skb->len > mtu && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb)) || + if (skb->len > mtu || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); - else - return ip6_finish_output2(net, sk, skb); + + return ip6_finish_output2(net, sk, skb); } static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -238,12 +247,11 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(ip6_output); -bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) +bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { - if (!np->autoflowlabel_set) + if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); - else - return np->autoflowlabel; + return inet6_test_bit(AUTOFLOWLABEL, sk); } /* @@ -315,12 +323,12 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * Fill in the IPv6 header */ if (np) - hlimit = np->hop_limit; + hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; @@ -335,7 +343,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -375,9 +383,8 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { - struct ipv6_pinfo *np = inet6_sk(sk); - if (np && np->rtalert_isolate && + if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } @@ -454,10 +461,6 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); - - __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); @@ -625,6 +628,8 @@ int ip6_forward(struct sk_buff *skb) } } + __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; @@ -887,9 +892,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, mtu = IPV6_MIN_MTU; } - if (np && np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; + if (np) { + u32 frag_size = READ_ONCE(np->frag_size); + + if (frag_size && frag_size < mtu) + mtu = frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; @@ -1023,9 +1030,6 @@ slow_path: return err; fail_toobig: - if (skb->sk && dst_allfrag(skb_dst(skb))) - sk_gso_disable(skb->sk); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; @@ -1119,7 +1123,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, + sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, &fl6->saddr); rcu_read_unlock(); @@ -1289,74 +1293,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -/** - * ip6_dst_lookup_tunnel - perform route lookup on tunnel - * @skb: Packet for which lookup is done - * @dev: Tunnel device - * @net: Network namespace of tunnel device - * @sock: Socket which provides route info - * @saddr: Memory to store the src ip address - * @info: Tunnel information - * @protocol: IP protocol - * @use_cache: Flag to enable cache usage - * This function performs a route lookup on a tunnel - * - * It returns a valid dst pointer and stores src address to be used in - * tunnel in param saddr on success, else a pointer encoded error code. - */ - -struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, - struct net_device *dev, - struct net *net, - struct socket *sock, - struct in6_addr *saddr, - const struct ip_tunnel_info *info, - u8 protocol, - bool use_cache) -{ - struct dst_entry *dst = NULL; -#ifdef CONFIG_DST_CACHE - struct dst_cache *dst_cache; -#endif - struct flowi6 fl6; - __u8 prio; - -#ifdef CONFIG_DST_CACHE - dst_cache = (struct dst_cache *)&info->dst_cache; - if (use_cache) { - dst = dst_cache_get_ip6(dst_cache, saddr); - if (dst) - return dst; - } -#endif - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = protocol; - fl6.daddr = info->key.u.ipv6.dst; - fl6.saddr = info->key.u.ipv6.src; - prio = info->key.tos; - fl6.flowlabel = ip6_make_flowinfo(prio, info->key.label); - - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, - NULL); - if (IS_ERR(dst)) { - netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); - return ERR_PTR(-ENETUNREACH); - } - if (dst->dev == dev) { /* is this necessary? */ - netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); - dst_release(dst); - return ERR_PTR(-ELOOP); - } -#ifdef CONFIG_DST_CACHE - if (use_cache) - dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); -#endif - *saddr = fl6.saddr; - return dst; -} -EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); - static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { @@ -1398,7 +1334,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); - unsigned int mtu; + unsigned int mtu, frag_size; struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so @@ -1442,25 +1378,23 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else - mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); - if (np->frag_size < mtu) { - if (np->frag_size) - mtu = np->frag_size; - } + + frag_size = READ_ONCE(np->frag_size); + if (frag_size && frag_size < mtu) + mtu = frag_size; + cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); - if (dst_allfrag(xfrm_dst_path(&rt->dst))) - cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0; - cork->base.transmit_time = ipc6->sockc.transmit_time; return 0; @@ -1517,8 +1451,6 @@ static int __ip6_append_data(struct sock *sk, headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + - (dst_allfrag(&rt->dst) ? - sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; if (mtu <= fragheaderlen || @@ -1628,7 +1560,7 @@ emsgsize: while (length > 0) { /* Check if the remaining data fits into current packet. */ - copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; + copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; @@ -1659,7 +1591,7 @@ alloc_new_skb: */ datalen = length + fraggap; - if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) + if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; fraglen = datalen + fragheaderlen; pagedlen = 0; @@ -1908,7 +1840,6 @@ static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) struct dst_entry *dst = cork->base.dst; cork->base.dst = NULL; - cork->base.flags &= ~IPCORK_ALLFRAG; skb_dst_set(skb, dst); } @@ -1929,7 +1860,6 @@ static void ip6_cork_release(struct inet_cork_full *cork, if (cork->base.dst) { dst_release(cork->base.dst); cork->base.dst = NULL; - cork->base.flags &= ~IPCORK_ALLFRAG; } } @@ -1941,7 +1871,6 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; @@ -1984,18 +1913,18 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, - ip6_autoflowlabel(net, np), fl6)); + ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; - skb->priority = sk->sk_priority; + skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; ip6_cork_steal_dst(skb, cork); - IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; @@ -2097,7 +2026,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, return ERR_PTR(err); } if (ipc6->dontfrag < 0) - ipc6->dontfrag = inet6_sk(sk)->dontfrag; + ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, ¤t->task_frag, getfrag, from, diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 46c19bd489..9bbabf750a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { @@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_reset_mac_header(skb); } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index cdc4d4ee24..a7bf0327b3 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -1,3 +1,4 @@ + // SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/errno.h> @@ -75,8 +76,9 @@ EXPORT_SYMBOL_GPL(udp_sock_create6); int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, - struct net_device *dev, struct in6_addr *saddr, - struct in6_addr *daddr, + struct net_device *dev, + const struct in6_addr *saddr, + const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck) { @@ -111,4 +113,73 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); +/** + * udp_tunnel6_dst_lookup - perform route lookup on UDP tunnel + * @skb: Packet for which lookup is done + * @dev: Tunnel device + * @net: Network namespace of tunnel device + * @sock: Socket which provides route info + * @oif: Index of the output interface + * @saddr: Memory to store the src ip address + * @key: Tunnel information + * @sport: UDP source port + * @dport: UDP destination port + * @dsfield: The traffic class field + * @dst_cache: The dst cache to use for lookup + * This function performs a route lookup on a UDP tunnel + * + * It returns a valid dst pointer and stores src address to be used in + * tunnel in param saddr on success, else a pointer encoded error code. + */ + +struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, + struct net_device *dev, + struct net *net, + struct socket *sock, + int oif, + struct in6_addr *saddr, + const struct ip_tunnel_key *key, + __be16 sport, __be16 dport, u8 dsfield, + struct dst_cache *dst_cache) +{ + struct dst_entry *dst = NULL; + struct flowi6 fl6; + +#ifdef CONFIG_DST_CACHE + if (dst_cache) { + dst = dst_cache_get_ip6(dst_cache, saddr); + if (dst) + return dst; + } +#endif + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = IPPROTO_UDP; + fl6.flowi6_oif = oif; + fl6.daddr = key->u.ipv6.dst; + fl6.saddr = key->u.ipv6.src; + fl6.fl6_sport = sport; + fl6.fl6_dport = dport; + fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label); + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, + NULL); + if (IS_ERR(dst)) { + netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (dst->dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); + dst_release(dst); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (dst_cache) + dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); +#endif + *saddr = fl6.saddr; + return dst; +} +EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup); + MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 73c85d4e0e..e550240c85 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; default: goto tx_err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 0e2a0847b3..7d661735cb 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -415,6 +415,101 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); + /* Handle options that can be set without locking the socket. */ + switch (optname) { + case IPV6_UNICAST_HOPS: + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->hop_limit, val); + return 0; + case IPV6_MULTICAST_LOOP: + if (optlen < sizeof(int)) + return -EINVAL; + if (val != valbool) + return -EINVAL; + inet6_assign_bit(MC6_LOOP, sk, valbool); + return 0; + case IPV6_MULTICAST_HOPS: + if (sk->sk_type == SOCK_STREAM) + return retv; + if (optlen < sizeof(int)) + return -EINVAL; + if (val > 255 || val < -1) + return -EINVAL; + WRITE_ONCE(np->mcast_hops, + val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); + return 0; + case IPV6_MTU: + if (optlen < sizeof(int)) + return -EINVAL; + if (val && val < IPV6_MIN_MTU) + return -EINVAL; + WRITE_ONCE(np->frag_size, val); + return 0; + case IPV6_MINHOPCOUNT: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 255) + return -EINVAL; + + if (val) + static_branch_enable(&ip6_min_hopcount); + + /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount + * while we are changing it. + */ + WRITE_ONCE(np->min_hopcount, val); + return 0; + case IPV6_RECVERR_RFC4884: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < 0 || val > 1) + return -EINVAL; + inet6_assign_bit(RECVERR6_RFC4884, sk, valbool); + return 0; + case IPV6_MULTICAST_ALL: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(MC6_ALL, sk, valbool); + return 0; + case IPV6_AUTOFLOWLABEL: + inet6_assign_bit(AUTOFLOWLABEL, sk, valbool); + inet6_set_bit(AUTOFLOWLABEL_SET, sk); + return 0; + case IPV6_DONTFRAG: + inet6_assign_bit(DONTFRAG, sk, valbool); + return 0; + case IPV6_RECVERR: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RECVERR6, sk, valbool); + if (!val) + skb_errqueue_purge(&sk->sk_error_queue); + return 0; + case IPV6_ROUTER_ALERT_ISOLATE: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(RTALERT_ISOLATE, sk, valbool); + return 0; + case IPV6_MTU_DISCOVER: + if (optlen < sizeof(int)) + return -EINVAL; + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) + return -EINVAL; + WRITE_ONCE(np->pmtudisc, val); + return 0; + case IPV6_FLOWINFO_SEND: + if (optlen < sizeof(int)) + return -EINVAL; + inet6_assign_bit(SNDFLOW, sk, valbool); + return 0; + case IPV6_ADDR_PREFERENCES: + if (optlen < sizeof(int)) + return -EINVAL; + return ip6_sock_set_addr_preferences(sk, val); + } if (needs_rtnl) rtnl_lock(); sockopt_lock_sock(sk); @@ -733,34 +828,7 @@ done: } break; } - case IPV6_UNICAST_HOPS: - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->hop_limit = val; - retv = 0; - break; - case IPV6_MULTICAST_HOPS: - if (sk->sk_type == SOCK_STREAM) - break; - if (optlen < sizeof(int)) - goto e_inval; - if (val > 255 || val < -1) - goto e_inval; - np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); - retv = 0; - break; - - case IPV6_MULTICAST_LOOP: - if (optlen < sizeof(int)) - goto e_inval; - if (val != valbool) - goto e_inval; - np->mc_loop = valbool; - retv = 0; - break; case IPV6_UNICAST_IF: { @@ -862,13 +930,6 @@ done: retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } - case IPV6_MULTICAST_ALL: - if (optlen < sizeof(int)) - goto e_inval; - np->mc_all = valbool; - retv = 0; - break; - case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: if (in_compat_syscall()) @@ -896,42 +957,6 @@ done: goto e_inval; retv = ip6_ra_control(sk, val); break; - case IPV6_ROUTER_ALERT_ISOLATE: - if (optlen < sizeof(int)) - goto e_inval; - np->rtalert_isolate = valbool; - retv = 0; - break; - case IPV6_MTU_DISCOVER: - if (optlen < sizeof(int)) - goto e_inval; - if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) - goto e_inval; - np->pmtudisc = val; - retv = 0; - break; - case IPV6_MTU: - if (optlen < sizeof(int)) - goto e_inval; - if (val && val < IPV6_MIN_MTU) - goto e_inval; - np->frag_size = val; - retv = 0; - break; - case IPV6_RECVERR: - if (optlen < sizeof(int)) - goto e_inval; - np->recverr = valbool; - if (!val) - skb_errqueue_purge(&sk->sk_error_queue); - retv = 0; - break; - case IPV6_FLOWINFO_SEND: - if (optlen < sizeof(int)) - goto e_inval; - np->sndflow = valbool; - retv = 0; - break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; @@ -943,47 +968,10 @@ done: retv = xfrm_user_policy(sk, optname, optval, optlen); break; - case IPV6_ADDR_PREFERENCES: - if (optlen < sizeof(int)) - goto e_inval; - retv = __ip6_sock_set_addr_preferences(sk, val); - break; - case IPV6_MINHOPCOUNT: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 255) - goto e_inval; - - if (val) - static_branch_enable(&ip6_min_hopcount); - - /* tcp_v6_err() and tcp_v6_rcv() might read min_hopcount - * while we are changing it. - */ - WRITE_ONCE(np->min_hopcount, val); - retv = 0; - break; - case IPV6_DONTFRAG: - np->dontfrag = valbool; - retv = 0; - break; - case IPV6_AUTOFLOWLABEL: - np->autoflowlabel = valbool; - np->autoflowlabel_set = 1; - retv = 0; - break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; - case IPV6_RECVERR_RFC4884: - if (optlen < sizeof(int)) - goto e_inval; - if (val < 0 || val > 1) - goto e_inval; - np->recverr_rfc4884 = valbool; - retv = 0; - break; } unlock: @@ -1180,7 +1168,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxhlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { @@ -1197,7 +1186,8 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } if (np->rxopt.bits.rxohlim) { - int hlim = np->mcast_hops; + int hlim = READ_ONCE(np->mcast_hops); + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxflow) { @@ -1347,9 +1337,9 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, struct dst_entry *dst; if (optname == IPV6_UNICAST_HOPS) - val = np->hop_limit; + val = READ_ONCE(np->hop_limit); else - val = np->mcast_hops; + val = READ_ONCE(np->mcast_hops); if (val < 0) { rcu_read_lock(); @@ -1365,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_MULTICAST_LOOP: - val = np->mc_loop; + val = inet6_test_bit(MC6_LOOP, sk); break; case IPV6_MULTICAST_IF: @@ -1373,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MULTICAST_ALL: - val = np->mc_all; + val = inet6_test_bit(MC6_ALL, sk); break; case IPV6_UNICAST_IF: @@ -1381,15 +1371,15 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_MTU_DISCOVER: - val = np->pmtudisc; + val = READ_ONCE(np->pmtudisc); break; case IPV6_RECVERR: - val = np->recverr; + val = inet6_test_bit(RECVERR6, sk); break; case IPV6_FLOWINFO_SEND: - val = np->sndflow; + val = inet6_test_bit(SNDFLOW, sk); break; case IPV6_FLOWLABEL_MGR: @@ -1424,33 +1414,35 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } case IPV6_ADDR_PREFERENCES: + { + u8 srcprefs = READ_ONCE(np->srcprefs); val = 0; - if (np->srcprefs & IPV6_PREFER_SRC_TMP) + if (srcprefs & IPV6_PREFER_SRC_TMP) val |= IPV6_PREFER_SRC_TMP; - else if (np->srcprefs & IPV6_PREFER_SRC_PUBLIC) + else if (srcprefs & IPV6_PREFER_SRC_PUBLIC) val |= IPV6_PREFER_SRC_PUBLIC; else { /* XXX: should we return system default? */ val |= IPV6_PREFER_SRC_PUBTMP_DEFAULT; } - if (np->srcprefs & IPV6_PREFER_SRC_COA) + if (srcprefs & IPV6_PREFER_SRC_COA) val |= IPV6_PREFER_SRC_COA; else val |= IPV6_PREFER_SRC_HOME; break; - + } case IPV6_MINHOPCOUNT: - val = np->min_hopcount; + val = READ_ONCE(np->min_hopcount); break; case IPV6_DONTFRAG: - val = np->dontfrag; + val = inet6_test_bit(DONTFRAG, sk); break; case IPV6_AUTOFLOWLABEL: - val = ip6_autoflowlabel(sock_net(sk), np); + val = ip6_autoflowlabel(sock_net(sk), sk); break; case IPV6_RECVFRAGSIZE: @@ -1458,11 +1450,11 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case IPV6_ROUTER_ALERT_ISOLATE: - val = np->rtalert_isolate; + val = inet6_test_bit(RTALERT_ISOLATE, sk); break; case IPV6_RECVERR_RFC4884: - val = np->recverr_rfc4884; + val = inet6_test_bit(RECVERR6_RFC4884, sk); break; default: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f948cf7bfc..bc6e0a0bad 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -642,7 +642,7 @@ bool inet6_mc_check(const struct sock *sk, const struct in6_addr *mc_addr, } if (!mc) { rcu_read_unlock(); - return np->mc_all; + return inet6_test_bit(MC6_ALL, sk); } psl = rcu_dereference(mc->sflist); if (!psl) { @@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb, hdr->payload_len = htons(len); hdr->nexthdr = proto; - hdr->hop_limit = inet6_sk(sk)->hop_limit; + hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit); hdr->saddr = *saddr; hdr->daddr = *daddr; @@ -1789,7 +1789,7 @@ static void mld_sendpack(struct sk_buff *skb) rcu_read_lock(); idev = __in6_dev_get(skb->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) - sizeof(*pip6); @@ -2147,8 +2147,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) full_len = sizeof(struct ipv6hdr) + payload_len; rcu_read_lock(); - IP6_UPD_PO_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUT, full_len); + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); rcu_read_unlock(); skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); @@ -3015,8 +3014,6 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s continue; state->im = rcu_dereference(state->idev->mc_list); } - if (!state->im) - break; psf = rcu_dereference(state->im->mca_sources); } out: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 553c8664e0..a19999b30b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -500,11 +500,11 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, csum_partial(icmp6h, skb->len, 0)); - ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len); + ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, dst->dev, @@ -1996,7 +1996,7 @@ static int __net_init ndisc_net_init(struct net *net) np = inet6_sk(sk); np->hop_limit = 255; /* Do not loopback ndisc messages */ - np->mc_loop = 0; + inet6_clear_bit(MC6_LOOP, sk); return 0; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 857713d7a3..53d255838e 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { + xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) { skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index a88b2ce4a3..8dd4cd0c47 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -31,10 +31,10 @@ static const struct xt_table packet_mangler = { static unsigned int ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - unsigned int ret; struct in6_addr saddr, daddr; - u_int8_t hop_limit; - u_int32_t flowlabel, mark; + unsigned int ret, verdict; + u32 flowlabel, mark; + u8 hop_limit; int err; /* save source/dest address, mark, hoplimit, flowlabel, priority, */ @@ -47,8 +47,9 @@ ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *sta flowlabel = *((u_int32_t *)ipv6_hdr(skb)); ret = ip6t_do_table(priv, skb, state); + verdict = ret & NF_VERDICT_MASK; - if (ret != NF_DROP && ret != NF_STOLEN && + if (verdict != NF_DROP && verdict != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || skb->mark != mark || diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index bf3cb3a136..52cf104e34 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -170,3 +170,4 @@ module_init(ip6table_nat_init); module_exit(ip6table_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ip6tables legacy nat table"); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 08861d5d1f..fc9f675402 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -106,3 +106,4 @@ static void __exit ip6table_raw_fini(void) module_init(ip6table_raw_init); module_exit(ip6table_raw_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ip6tables legacy raw table"); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index d59b296b4f..be7817fbc0 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -182,3 +182,4 @@ module_init(nf_defrag_init); module_exit(nf_defrag_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6 defragmentation support"); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 71d6927282..196dd4ecb5 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -418,3 +418,4 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, EXPORT_SYMBOL_GPL(nf_send_unreach6); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6 packet rejection core"); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 25243737fb..d2098dd4ce 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -89,7 +89,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; } daddr = &(u->sin6_addr); - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) fl6.flowlabel = u->sin6_flowinfo & IPV6_FLOWINFO_MASK; if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) oif = u->sin6_scope_id; @@ -118,7 +118,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if)) return -EINVAL; - ipcm6_init_sk(&ipc6, np); + ipcm6_init_sk(&ipc6, sk); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index e20b3705c2..6d1d922164 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -61,7 +61,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6InDiscards", IPSTATS_MIB_INDISCARDS), SNMP_MIB_ITEM("Ip6InDelivers", IPSTATS_MIB_INDELIVERS), SNMP_MIB_ITEM("Ip6OutForwDatagrams", IPSTATS_MIB_OUTFORWDATAGRAMS), - SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTPKTS), + SNMP_MIB_ITEM("Ip6OutRequests", IPSTATS_MIB_OUTREQUESTS), SNMP_MIB_ITEM("Ip6OutDiscards", IPSTATS_MIB_OUTDISCARDS), SNMP_MIB_ITEM("Ip6OutNoRoutes", IPSTATS_MIB_OUTNOROUTES), SNMP_MIB_ITEM("Ip6ReasmTimeout", IPSTATS_MIB_REASMTIMEOUT), @@ -84,6 +84,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = { SNMP_MIB_ITEM("Ip6InECT1Pkts", IPSTATS_MIB_ECT1PKTS), SNMP_MIB_ITEM("Ip6InECT0Pkts", IPSTATS_MIB_ECT0PKTS), SNMP_MIB_ITEM("Ip6InCEPkts", IPSTATS_MIB_CEPKTS), + SNMP_MIB_ITEM("Ip6OutTransmits", IPSTATS_MIB_OUTPKTS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 42fcec3ecf..dd0a4e73e6 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -291,6 +291,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; @@ -300,26 +301,26 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ - if (!np->recverr && sk->sk_state != TCP_ESTABLISHED) + if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); - harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); + harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); return; } - if (np->recverr) { + if (recverr) { u8 *payload = skb->data; if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } - if (np->recverr || harderr) { + if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } @@ -587,7 +588,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags, const struct sockcm_cookie *sockc) { - struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; @@ -651,7 +651,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, * have been queued for deletion. */ rcu_read_lock(); - IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) @@ -668,7 +668,7 @@ out: error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); error_check: - if (err == -ENOBUFS && !np->recverr) + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } @@ -795,7 +795,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); @@ -898,7 +898,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 56525b5b95..ea1dec8448 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -341,7 +341,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 1, DST_OBSOLETE_FORCE_CHK, flags); + DST_OBSOLETE_FORCE_CHK, flags); if (rt) { rt6_info_init(rt); @@ -2622,7 +2622,7 @@ static struct dst_entry *ip6_route_output_flags_noref(struct net *net, if (!any_src) flags |= RT6_LOOKUP_F_HAS_SADDR; else if (sk) - flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); + flags |= rt6_srcprefs2flags(READ_ONCE(inet6_sk(sk)->srcprefs)); return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } @@ -2655,7 +2655,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct net_device *loopback_dev = net->loopback_dev; struct dst_entry *new = NULL; - rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, + rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 29346a6eec..35508abd76 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -512,22 +512,24 @@ int __init seg6_init(void) { int err; - err = genl_register_family(&seg6_genl_family); + err = register_pernet_subsys(&ip6_segments_ops); if (err) goto out; - err = register_pernet_subsys(&ip6_segments_ops); + err = genl_register_family(&seg6_genl_family); if (err) - goto out_unregister_genl; + goto out_unregister_pernet; #ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) - goto out_unregister_pernet; + goto out_unregister_genl; err = seg6_local_init(); - if (err) - goto out_unregister_pernet; + if (err) { + seg6_iptunnel_exit(); + goto out_unregister_genl; + } #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -548,11 +550,11 @@ out_unregister_iptun: #endif #endif #ifdef CONFIG_IPV6_SEG6_LWTUNNEL -out_unregister_pernet: - unregister_pernet_subsys(&ip6_segments_ops); -#endif out_unregister_genl: genl_unregister_family(&seg6_genl_family); +#endif +out_unregister_pernet: + unregister_pernet_subsys(&ip6_segments_ops); goto out; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8698b49dfc..12eedc6ca2 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -140,6 +140,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst; __u8 rcv_wscale; u32 tsoff = 0; + int l3index; if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -215,6 +216,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_isn = cookie; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); + if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv6/tcp_ao.c b/net/ipv6/tcp_ao.c new file mode 100644 index 0000000000..3c09ac2620 --- /dev/null +++ b/net/ipv6/tcp_ao.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * INET An implementation of the TCP Authentication Option (TCP-AO). + * See RFC5925. + * + * Authors: Dmitry Safonov <dima@arista.com> + * Francesco Ruggeri <fruggeri@arista.com> + * Salam Noureddine <noureddine@arista.com> + */ +#include <crypto/hash.h> +#include <linux/tcp.h> + +#include <net/tcp.h> +#include <net/ipv6.h> + +static int tcp_v6_ao_calc_key(struct tcp_ao_key *mkt, u8 *key, + const struct in6_addr *saddr, + const struct in6_addr *daddr, + __be16 sport, __be16 dport, + __be32 sisn, __be32 disn) +{ + struct kdf_input_block { + u8 counter; + u8 label[6]; + struct tcp6_ao_context ctx; + __be16 outlen; + } __packed * tmp; + struct tcp_sigpool hp; + int err; + + err = tcp_sigpool_start(mkt->tcp_sigpool_id, &hp); + if (err) + return err; + + tmp = hp.scratch; + tmp->counter = 1; + memcpy(tmp->label, "TCP-AO", 6); + tmp->ctx.saddr = *saddr; + tmp->ctx.daddr = *daddr; + tmp->ctx.sport = sport; + tmp->ctx.dport = dport; + tmp->ctx.sisn = sisn; + tmp->ctx.disn = disn; + tmp->outlen = htons(tcp_ao_digest_size(mkt) * 8); /* in bits */ + + err = tcp_ao_calc_traffic_key(mkt, key, tmp, sizeof(*tmp), &hp); + tcp_sigpool_end(&hp); + + return err; +} + +int tcp_v6_ao_calc_key_skb(struct tcp_ao_key *mkt, u8 *key, + const struct sk_buff *skb, + __be32 sisn, __be32 disn) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); + + return tcp_v6_ao_calc_key(mkt, key, &iph->saddr, + &iph->daddr, th->source, + th->dest, sisn, disn); +} + +int tcp_v6_ao_calc_key_sk(struct tcp_ao_key *mkt, u8 *key, + const struct sock *sk, __be32 sisn, + __be32 disn, bool send) +{ + if (send) + return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_rcv_saddr, + &sk->sk_v6_daddr, htons(sk->sk_num), + sk->sk_dport, sisn, disn); + else + return tcp_v6_ao_calc_key(mkt, key, &sk->sk_v6_daddr, + &sk->sk_v6_rcv_saddr, sk->sk_dport, + htons(sk->sk_num), disn, sisn); +} + +int tcp_v6_ao_calc_key_rsk(struct tcp_ao_key *mkt, u8 *key, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + + return tcp_v6_ao_calc_key(mkt, key, + &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr, + htons(ireq->ir_num), ireq->ir_rmt_port, + htonl(tcp_rsk(req)->snt_isn), + htonl(tcp_rsk(req)->rcv_isn)); +} + +struct tcp_ao_key *tcp_v6_ao_lookup(const struct sock *sk, + struct sock *addr_sk, + int sndid, int rcvid) +{ + int l3index = l3mdev_master_ifindex_by_index(sock_net(sk), + addr_sk->sk_bound_dev_if); + struct in6_addr *addr = &addr_sk->sk_v6_daddr; + + return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, + AF_INET6, sndid, rcvid); +} + +struct tcp_ao_key *tcp_v6_ao_lookup_rsk(const struct sock *sk, + struct request_sock *req, + int sndid, int rcvid) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct in6_addr *addr = &ireq->ir_v6_rmt_addr; + int l3index; + + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + return tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, + AF_INET6, sndid, rcvid); +} + +int tcp_v6_ao_hash_pseudoheader(struct tcp_sigpool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, int nbytes) +{ + struct tcp6_pseudohdr *bp; + struct scatterlist sg; + + bp = hp->scratch; + /* 1. TCP pseudo-header (RFC2460) */ + bp->saddr = *saddr; + bp->daddr = *daddr; + bp->len = cpu_to_be32(nbytes); + bp->protocol = cpu_to_be32(IPPROTO_TCP); + + sg_init_one(&sg, bp, sizeof(*bp)); + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp)); + return crypto_ahash_update(hp->req); +} + +int tcp_v6_ao_hash_skb(char *ao_hash, struct tcp_ao_key *key, + const struct sock *sk, const struct sk_buff *skb, + const u8 *tkey, int hash_offset, u32 sne) +{ + return tcp_ao_hash_skb(AF_INET6, ao_hash, key, sk, skb, tkey, + hash_offset, sne); +} + +int tcp_v6_parse_ao(struct sock *sk, int cmd, + sockptr_t optval, int optlen) +{ + return tcp_parse_ao(sk, cmd, AF_INET6, optval, optlen); +} + +int tcp_v6_ao_synack_hash(char *ao_hash, struct tcp_ao_key *ao_key, + struct request_sock *req, const struct sk_buff *skb, + int hash_offset, u32 sne) +{ + void *hash_buf = NULL; + int err; + + hash_buf = kmalloc(tcp_ao_digest_size(ao_key), GFP_ATOMIC); + if (!hash_buf) + return -ENOMEM; + + err = tcp_v6_ao_calc_key_rsk(ao_key, hash_buf, req); + if (err) + goto out; + + err = tcp_ao_hash_skb(AF_INET6, ao_hash, ao_key, req_to_sk(req), skb, + hash_buf, hash_offset, sne); +out: + kfree(hash_buf); + return err; +} diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3783334ef2..8c6623496d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,16 +76,9 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; -#else -static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, - const struct in6_addr *addr, - int l3index) -{ - return NULL; -} #endif /* Helper returning the inet6 address from a given tcp socket. @@ -163,7 +156,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, memset(&fl6, 0, sizeof(fl6)); - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { @@ -239,7 +232,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif @@ -252,7 +245,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; @@ -286,6 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } + tp->tcp_usec_ts = dst_tcp_usec_ts(dst); tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { @@ -402,6 +396,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { + /* To increase the counter of ignored icmps for TCP-AO */ + tcp_ao_ignore_icmp(sk, AF_INET6, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } @@ -412,6 +408,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } + if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { + sock_put(sk); + return 0; + } + bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); @@ -508,7 +509,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tcp_ld_RTO_revert(sk, seq); } - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { @@ -548,7 +549,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; - if (np->repflow && ireq->pktopts) + if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? @@ -565,7 +566,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), - opt, tclass, sk->sk_priority); + opt, tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } @@ -606,8 +607,10 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; + union tcp_ao_addr *addr; int l3index = 0; u8 prefixlen; + bool l3flag; u8 flags; if (optlen < sizeof(cmd)) @@ -620,6 +623,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; + l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { @@ -660,17 +664,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; - if (ipv6_addr_v4mapped(&sin6->sin6_addr)) - return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { + addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; + + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET, + l3flag ? l3index : -1, false)) + return -EKEYREJECTED; + return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); + } + + addr = (union tcp_md5_addr *)&sin6->sin6_addr; - return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, - AF_INET6, prefixlen, l3index, flags, + /* Don't allow keys for peers that have a matching TCP-AO key. + * See the comment in tcp_ao_add_cmd() + */ + if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) + return -EKEYREJECTED; + + return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } -static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, +static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, const struct tcphdr *th, int nbytes) @@ -691,39 +711,36 @@ static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, + ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); - return crypto_ahash_update(hp->md5_req); + return crypto_ahash_update(hp->req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { - struct tcp_md5sig_pool *hp; - struct ahash_request *req; + struct tcp_sigpool hp; - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) + if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } @@ -733,10 +750,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, const struct sock *sk, const struct sk_buff *skb) { - const struct in6_addr *saddr, *daddr; - struct tcp_md5sig_pool *hp; - struct ahash_request *req; const struct tcphdr *th = tcp_hdr(skb); + const struct in6_addr *saddr, *daddr; + struct tcp_sigpool hp; if (sk) { /* valid for establish/request sockets */ saddr = &sk->sk_v6_rcv_saddr; @@ -747,34 +763,31 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, daddr = &ip6h->daddr; } - hp = tcp_get_md5sig_pool(); - if (!hp) - goto clear_hash_noput; - req = hp->md5_req; + if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) + goto clear_hash_nostart; - if (crypto_ahash_init(req)) + if (crypto_ahash_init(hp.req)) goto clear_hash; - if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) + if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; - if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) + if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; - if (tcp_md5_hash_key(hp, key)) + if (tcp_md5_hash_key(&hp, key)) goto clear_hash; - ahash_request_set_crypt(req, NULL, md5_hash, 0); - if (crypto_ahash_final(req)) + ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); + if (crypto_ahash_final(hp.req)) goto clear_hash; - tcp_put_md5sig_pool(); + tcp_sigpool_end(&hp); return 0; clear_hash: - tcp_put_md5sig_pool(); -clear_hash_noput: + tcp_sigpool_end(&hp); +clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } - #endif static void tcp_v6_init_req(struct request_sock *req, @@ -797,7 +810,7 @@ static void tcp_v6_init_req(struct request_sock *req, (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || - np->rxopt.bits.rxohlim || np->repflow)) { + np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { refcount_inc(&skb->users); ireq->pktopts = skb; } @@ -833,6 +846,11 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup_rsk, + .ao_calc_key = tcp_v6_ao_calc_key_rsk, + .ao_synack_hash = tcp_v6_ao_synack_hash, +#endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif @@ -844,8 +862,8 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, - int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, __be32 label, u32 priority, u32 txhash) + int oif, int rst, u8 tclass, __be32 label, + u32 priority, u32 txhash, struct tcp_key *key) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -860,13 +878,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; -#ifdef CONFIG_TCP_MD5SIG - if (key) + if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; -#endif + if (tcp_key_is_ao(key)) + tot_len += tcp_ao_len_aligned(key->ao_key); #ifdef CONFIG_MPTCP - if (rst && !key) { + if (rst && !tcp_key_is_md5(key)) { mrst = mptcp_reset_option(skb); if (mrst) @@ -907,14 +925,28 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 *topt++ = mrst; #ifdef CONFIG_TCP_MD5SIG - if (key) { + if (tcp_key_is_md5(key)) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - tcp_v6_md5_hash_hdr((__u8 *)topt, key, + tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, t1); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_key_is_ao(key)) { + *topt++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (key->rcv_next)); + + tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, + key->traffic_key, + (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, + (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, + t1, key->sne); + } +#endif memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; @@ -977,19 +1009,23 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 seq = 0, ack_seq = 0; - struct tcp_md5sig_key *key = NULL; -#ifdef CONFIG_TCP_MD5SIG - const __u8 *hash_location = NULL; - unsigned char newhash[16]; - int genhash; - struct sock *sk1 = NULL; + const __u8 *md5_hash_location = NULL; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) + bool allocated_traffic_key = false; #endif + const struct tcp_ao_hdr *aoh; + struct tcp_key key = {}; + u32 seq = 0, ack_seq = 0; __be32 label = 0; u32 priority = 0; struct net *net; u32 txhash = 0; int oif = 0; +#ifdef CONFIG_TCP_MD5SIG + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; +#endif if (th->rst) return; @@ -1001,9 +1037,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) return; net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); -#ifdef CONFIG_TCP_MD5SIG + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) + return; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); - hash_location = tcp_parse_md5sig_option(th); +#endif +#ifdef CONFIG_TCP_MD5SIG if (sk && sk_fullsock(sk)) { int l3index; @@ -1011,8 +1051,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * in an L3 domain and inet_iif is set to it. */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; - key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); - } else if (hash_location) { + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; + } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); int l3index; @@ -1036,12 +1078,13 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) */ l3index = tcp_v6_sdif(skb) ? dif : 0; - key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); - if (!key) + key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); + if (!key.md5_key) goto out; + key.type = TCP_KEY_MD5; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); - if (genhash || memcmp(hash_location, newhash, 16) != 0) + genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); + if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } #endif @@ -1052,15 +1095,27 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); +#ifdef CONFIG_TCP_AO + if (aoh) { + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, + &key.ao_key, &key.traffic_key, + &allocated_traffic_key, + &key.rcv_next, &key.sne)) + goto out; + key.type = TCP_KEY_AO; + } +#endif + if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { - const struct ipv6_pinfo *np = tcp_inet6_sk(sk); - trace_tcp_send_reset(sk, skb); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); - priority = sk->sk_priority; + priority = READ_ONCE(sk->sk_priority); txhash = sk->sk_txhash; } if (sk->sk_state == TCP_TIME_WAIT) { @@ -1073,45 +1128,141 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) label = ip6_flowlabel(ipv6h); } - tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, - ipv6_get_dsfield(ipv6h), label, priority, txhash); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, + ipv6_get_dsfield(ipv6h), label, priority, txhash, + &key); -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) out: + if (allocated_traffic_key) + kfree(key.traffic_key); rcu_read_unlock(); #endif } static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, u8 tclass, + struct tcp_key *key, u8 tclass, __be32 label, u32 priority, u32 txhash) { - tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, - tclass, label, priority, txhash); + tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, + tclass, label, priority, txhash, key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + struct tcp_key key = {}; +#ifdef CONFIG_TCP_AO + struct tcp_ao_info *ao_info; + + if (static_branch_unlikely(&tcp_ao_needed.key)) { + + /* FIXME: the segment to-be-acked is not verified yet */ + ao_info = rcu_dereference(tcptw->ao_info); + if (ao_info) { + const struct tcp_ao_hdr *aoh; + + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + goto out; + if (aoh) + key.ao_key = tcp_ao_established_key(ao_info, + aoh->rnext_keyid, -1); + } + } + if (key.ao_key) { + struct tcp_ao_key *rnext_key; + + key.traffic_key = snd_other_key(key.ao_key); + /* rcv_next switches to our rcv_next */ + rnext_key = READ_ONCE(ao_info->rnext_key); + key.rcv_next = rnext_key->rcvid; + key.sne = READ_ONCE(ao_info->snd_sne); + key.type = TCP_KEY_AO; +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + key.md5_key = tcp_twsk_md5_key(tcptw); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp_raw() + tcptw->tw_ts_offset, - tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), + tcp_tw_tsval(tcptw), + tcptw->tw_ts_recent, tw->tw_bound_dev_if, &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); +#ifdef CONFIG_TCP_AO +out: +#endif inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - int l3index; + struct tcp_key key = {}; + +#ifdef CONFIG_TCP_AO + if (static_branch_unlikely(&tcp_ao_needed.key) && + tcp_rsk_used_ao(req)) { + const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; + const struct tcp_ao_hdr *aoh; + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + /* Invalid TCP option size or twice included auth */ + if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) + return; + if (!aoh) + return; + key.ao_key = tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)addr, + AF_INET6, aoh->rnext_keyid, -1); + if (unlikely(!key.ao_key)) { + /* Send ACK with any matching MKT for the peer */ + key.ao_key = tcp_ao_do_lookup(sk, l3index, + (union tcp_ao_addr *)addr, + AF_INET6, -1, -1); + /* Matching key disappeared (user removed the key?) + * let the handshake timeout. + */ + if (!key.ao_key) { + net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", + addr, + ntohs(tcp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, + ntohs(tcp_hdr(skb)->dest)); + return; + } + } + key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); + if (!key.traffic_key) + return; - l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + key.type = TCP_KEY_AO; + key.rcv_next = aoh->keyid; + tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); +#else + if (0) { +#endif +#ifdef CONFIG_TCP_MD5SIG + } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, + l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; +#endif + } /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. @@ -1125,12 +1276,13 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, + tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), - ipv6_get_dsfield(ipv6_hdr(skb)), 0, + &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); + if (tcp_key_is_ao(&key)) + kfree(key.traffic_key); } @@ -1235,7 +1387,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (sk_is_mptcp(newsk)) mptcpv6_handle_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif @@ -1247,7 +1399,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; newnp->rcv_flowinfo = 0; - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = 0; /* @@ -1320,7 +1472,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Set ToS of the new socket based upon the value of incoming SYN. @@ -1360,19 +1512,26 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * #ifdef CONFIG_TCP_MD5SIG l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); - /* Copy over the MD5 key from the original socket */ - key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); - if (key) { - const union tcp_md5_addr *addr; - - addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; - if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { - inet_csk_prepare_forced_close(newsk); - tcp_done(newsk); - goto out; + if (!tcp_rsk_used_ao(req)) { + /* Copy over the MD5 key from the original socket */ + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); + if (key) { + const union tcp_md5_addr *addr; + + addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; + if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto out; + } } } #endif +#ifdef CONFIG_TCP_AO + /* Copy over tcp_ao_info if any */ + if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) + goto out; /* OOM */ +#endif if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); @@ -1542,10 +1701,11 @@ ipv6_pktoptions: if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = tcp_v6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) - np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + WRITE_ONCE(np->mcast_hops, + ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); - if (np->repflow) + if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { tcp_v6_restore_cb(opt_skb); @@ -1643,9 +1803,9 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else - drop_reason = tcp_inbound_md5_hash(sk, skb, - &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + drop_reason = tcp_inbound_hash(sk, req, skb, + &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) { sk_drops_add(sk, skb); reqsk_put(req); @@ -1719,8 +1879,8 @@ process: goto discard_and_relse; } - drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, - AF_INET6, dif, sdif); + drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, + AF_INET6, dif, sdif); if (drop_reason) goto discard_and_relse; @@ -1895,7 +2055,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), - .net_frag_header_len = sizeof(struct frag_hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, @@ -1903,11 +2062,19 @@ const struct inet_connection_sock_af_ops ipv6_specific = { .mtu_reduced = tcp_v6_mtu_reduced, }; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup, + .calc_ao_hash = tcp_v6_ao_hash_skb, + .ao_parse = tcp_v6_parse_ao, + .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, +#endif }; #endif @@ -1929,11 +2096,19 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .mtu_reduced = tcp_v4_mtu_reduced, }; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { +#ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, +#endif +#ifdef CONFIG_TCP_AO + .ao_lookup = tcp_v6_ao_lookup, + .calc_ao_hash = tcp_v4_ao_hash_skb, + .ao_parse = tcp_v6_parse_ao, + .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, +#endif }; #endif @@ -1948,7 +2123,7 @@ static int tcp_v6_init_sock(struct sock *sk) icsk->icsk_af_ops = &ipv6_specific; -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; #endif diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 438476a313..a1a79ff46f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -599,7 +599,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!ip6_sk_accept_pmtu(sk)) goto out; ip6_sk_update_pmtu(skb, sk, info); - if (np->pmtudisc != IPV6_PMTUDISC_DONT) + if (READ_ONCE(np->pmtudisc) != IPV6_PMTUDISC_DONT) harderr = 1; } if (type == NDISC_REDIRECT) { @@ -620,7 +620,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if (!np->recverr) { + if (!inet6_test_bit(RECVERR6, sk)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { @@ -1284,7 +1284,7 @@ csum_partial: send: err = ip6_send_skb(skb); if (err) { - if (err == -ENOBUFS && !inet6_sk(sk)->recverr) { + if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS, is_udplite); err = 0; @@ -1430,7 +1430,7 @@ do_udp_sendmsg: fl6->fl6_dport = sin6->sin6_port; daddr = &sin6->sin6_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); @@ -1597,7 +1597,7 @@ back_from_confirm: do_append_data: if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); up->len += ulen; err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, fl6, (struct rt6_info *)dst, @@ -1610,7 +1610,7 @@ do_append_data: WRITE_ONCE(up->pending, 0); if (err > 0) - err = np->recverr ? net_xmit_errno(err) : 0; + err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0; release_sock(sk); out: diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4156387248..6e36e5047f 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,6 +16,8 @@ #include <linux/netfilter_ipv6.h> #include <net/ipv6.h> #include <net/xfrm.h> +#include <net/protocol.h> +#include <net/gro.h> int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, struct ip6_tnl *t) @@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return 0; } -/* If it's a keepalive packet, then just eat it. - * If it's an encapsulated packet, then pass it to the - * IPsec xfrm input. - * Returns 0 if skb passed to xfrm or was dropped. - * Returns >0 if skb should be passed to UDP. - * Returns <0 if skb should be resubmitted (-ret is protocol) - */ -int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; @@ -85,9 +80,6 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) __be32 *udpdata32; u16 encap_type; - if (skb->protocol == htons(ETH_P_IP)) - return xfrm4_udp_encap_rcv(sk, skb); - encap_type = READ_ONCE(up->encap_type); /* if this is not encapsulated socket, then just return now */ if (!encap_type) @@ -109,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); @@ -120,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP_NON_IKE: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && udpdata32[0] == 0 && udpdata32[1] == 0) { @@ -138,31 +130,100 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) - goto drop; + return -EINVAL; /* Now we can update and verify the packet length... */ ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len); if (skb->len < ip6hlen + len) { /* packet is too small!?! */ - goto drop; + return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); + if (pull) { + __skb_pull(skb, len); + skb_reset_transport_header(skb); + } else { + skb_set_transport_header(skb, len); + } /* process ESP */ - return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - -drop: - kfree_skb(skb); return 0; } +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_udp_encap_rcv(sk, skb); + + ret = __xfrm6_udp_encap_rcv(sk, skb, true); + if (!ret) + return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, + udp_sk(sk)->encap_type); + + if (ret < 0) { + kfree_skb(skb); + return 0; + } + + return ret; +} + +struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + const struct net_offload *ops; + struct sk_buff *pp = NULL; + int ret; + + if (skb->protocol == htons(ETH_P_IP)) + return xfrm4_gro_udp_encap_rcv(sk, head, skb); + + offset = offset - sizeof(struct udphdr); + + if (!pskb_pull(skb, offset)) + return NULL; + + rcu_read_lock(); + ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + + ret = __xfrm6_udp_encap_rcv(sk, skb, false); + if (ret) + goto out; + + skb_push(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + rcu_read_unlock(); + + return pp; + +out: + rcu_read_unlock(); + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index ad07904642..5f7b1fdbff 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -95,7 +95,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb) return -EMSGSIZE; } - if (toobig || dst_allfrag(skb_dst(skb))) + if (toobig) return ip6_fragment(net, sk, skb, __xfrm6_output_finish); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fc3fddeb6f..0ed6e34d6e 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -1823,7 +1823,7 @@ static int __init iucv_init(void) rc = -EPROTONOSUPPORT; goto out; } - ctl_set_bit(0, 1); + system_ctl_set_bit(0, CR0_IUCV_BIT); rc = iucv_query_maxconn(); if (rc) goto out_ctl; @@ -1871,7 +1871,7 @@ out_dev: out_int: unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); out_ctl: - ctl_clear_bit(0, 1); + system_ctl_clear_bit(0, 1); out: return rc; } diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index dd1d8ffd5f..1184d40167 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -634,7 +634,7 @@ retry: msize = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - msize += skb_shinfo(skb)->frags[i].bv_len; + msize += skb_frag_size(&skb_shinfo(skb)->frags[i]); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags, @@ -1946,4 +1946,5 @@ module_init(kcm_init); module_exit(kcm_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KCM (Kernel Connection Multiplexor) sockets"); MODULE_ALIAS_NETPROTO(PF_KCM); diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index f2ae03c404..25ca89f804 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -37,12 +37,6 @@ /* via netdev_priv() */ struct l2tp_eth { struct l2tp_session *session; - atomic_long_t tx_bytes; - atomic_long_t tx_packets; - atomic_long_t tx_dropped; - atomic_long_t rx_bytes; - atomic_long_t rx_packets; - atomic_long_t rx_errors; }; /* via l2tp_session_priv() */ @@ -79,10 +73,10 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev int ret = l2tp_xmit_skb(session, skb); if (likely(ret == NET_XMIT_SUCCESS)) { - atomic_long_add(len, &priv->tx_bytes); - atomic_long_inc(&priv->tx_packets); + DEV_STATS_ADD(dev, tx_bytes, len); + DEV_STATS_INC(dev, tx_packets); } else { - atomic_long_inc(&priv->tx_dropped); + DEV_STATS_INC(dev, tx_dropped); } return NETDEV_TX_OK; } @@ -90,14 +84,12 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev static void l2tp_eth_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { - struct l2tp_eth *priv = netdev_priv(dev); - - stats->tx_bytes = (unsigned long)atomic_long_read(&priv->tx_bytes); - stats->tx_packets = (unsigned long)atomic_long_read(&priv->tx_packets); - stats->tx_dropped = (unsigned long)atomic_long_read(&priv->tx_dropped); - stats->rx_bytes = (unsigned long)atomic_long_read(&priv->rx_bytes); - stats->rx_packets = (unsigned long)atomic_long_read(&priv->rx_packets); - stats->rx_errors = (unsigned long)atomic_long_read(&priv->rx_errors); + stats->tx_bytes = DEV_STATS_READ(dev, tx_bytes); + stats->tx_packets = DEV_STATS_READ(dev, tx_packets); + stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); + stats->rx_bytes = DEV_STATS_READ(dev, rx_bytes); + stats->rx_packets = DEV_STATS_READ(dev, rx_packets); + stats->rx_errors = DEV_STATS_READ(dev, rx_errors); } static const struct net_device_ops l2tp_eth_netdev_ops = { @@ -126,7 +118,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, { struct l2tp_eth_sess *spriv = l2tp_session_priv(session); struct net_device *dev; - struct l2tp_eth *priv; if (!pskb_may_pull(skb, ETH_HLEN)) goto error; @@ -144,12 +135,11 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, if (!dev) goto error_rcu; - priv = netdev_priv(dev); if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { - atomic_long_inc(&priv->rx_packets); - atomic_long_add(data_len, &priv->rx_bytes); + DEV_STATS_INC(dev, rx_packets); + DEV_STATS_ADD(dev, rx_bytes, data_len); } else { - atomic_long_inc(&priv->rx_errors); + DEV_STATS_INC(dev, rx_errors); } rcu_read_unlock(); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 11f3d375ce..763a59414b 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -431,7 +431,7 @@ static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, return -ENOTCONN; lsa->l2tp_conn_id = lsk->peer_conn_id; lsa->l2tp_addr = sk->sk_v6_daddr; - if (np->sndflow) + if (inet6_test_bit(SNDFLOW, sk)) lsa->l2tp_flowinfo = np->flow_label; } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) @@ -528,7 +528,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EAFNOSUPPORT; daddr = &lsa->l2tp_addr; - if (np->sndflow) { + if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = lsa->l2tp_flowinfo & IPV6_FLOWINFO_MASK; if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); @@ -620,14 +620,14 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (ipc6.dontfrag < 0) - ipc6.dontfrag = np->dontfrag; + ipc6.dontfrag = inet6_test_bit(DONTFRAG, sk); if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: lock_sock(sk); - ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0; + ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0); err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, transhdrlen, &ipc6, &fl6, (struct rt6_info *)dst, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 20551cfb7d..fde1140d89 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock) } netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); + sock_orphan(sk); + sock->sk = NULL; llc_sk_free(sk); out: return 0; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 51ec8256b7..cb0291decf 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -57,6 +57,17 @@ endif comment "Some wireless drivers require a rate control algorithm" depends on MAC80211 && MAC80211_HAS_RC=n +config MAC80211_KUNIT_TEST + tristate "KUnit tests for mac80211" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on MAC80211 + default KUNIT_ALL_TESTS + depends on !KERNEL_6_2 + help + Enable this option to test mac80211 internals with kunit. + + If unsure, say N. + config MAC80211_MESH bool "Enable mac80211 mesh networking support" depends on MAC80211 @@ -77,7 +88,7 @@ config MAC80211_LEDS config MAC80211_DEBUGFS bool "Export mac80211 internals in DebugFS" - depends on MAC80211 && DEBUG_FS + depends on MAC80211 && CFG80211_DEBUGFS help Select this to see extensive information about the internal state of mac80211 in debugfs. diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index b8de44da1f..c9eb527681 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -65,4 +65,6 @@ rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += \ mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) +obj-y += tests/ + ccflags-y += -DDEBUG diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index c6fa532304..9bffac7a49 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation */ /** @@ -55,8 +55,8 @@ static void ieee80211_free_tid_rx(struct rcu_head *h) kfree(tid_rx); } -void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason, bool tx) +void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason, bool tx) { struct ieee80211_local *local = sta->local; struct tid_ampdu_rx *tid_rx; @@ -69,10 +69,10 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, .ssn = 0, }; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); tid_rx = rcu_dereference_protected(sta->ampdu_mlme.tid_rx[tid], - lockdep_is_held(&sta->ampdu_mlme.mtx)); + lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!test_bit(tid, sta->ampdu_mlme.agg_session_valid)) return; @@ -114,14 +114,6 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); } -void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason, bool tx) -{ - mutex_lock(&sta->ampdu_mlme.mtx); - ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, tx); - mutex_unlock(&sta->ampdu_mlme.mtx); -} - void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, const u8 *addr) { @@ -140,7 +132,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap, if (ba_rx_bitmap & BIT(i)) set_bit(i, sta->ampdu_mlme.tid_rx_stop_requested); - ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work); rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_stop_rx_ba_session); @@ -166,7 +158,7 @@ static void sta_rx_agg_session_timer_expired(struct timer_list *t) sta->sta.addr, tid); set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); - ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work); } static void sta_rx_agg_reorder_timer_expired(struct timer_list *t) @@ -250,11 +242,11 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, ieee80211_tx_skb(sdata, skb); } -void ___ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq, - const struct ieee80211_addba_ext_ie *addbaext) +void __ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx, bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext) { struct ieee80211_local *local = sta->sdata->local; struct tid_ampdu_rx *tid_agg_rx; @@ -270,6 +262,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, u16 status = WLAN_STATUS_REQUEST_DECLINED; u16 max_buf_size; + lockdep_assert_wiphy(sta->local->hw.wiphy); + if (tid >= IEEE80211_FIRST_TSPEC_TSID) { ht_dbg(sta->sdata, "STA %pM requests BA session on unsupported tid %d\n", @@ -325,9 +319,6 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n", buf_size, sta->sta.addr); - /* examine state machine */ - lockdep_assert_held(&sta->ampdu_mlme.mtx); - if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { struct tid_ampdu_rx *tid_rx; @@ -355,9 +346,9 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, sta->sta.addr, tid); /* delete existing Rx BA session on the same tid */ - ___ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, - WLAN_STATUS_UNSPECIFIED_QOS, - false); + __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, + WLAN_STATUS_UNSPECIFIED_QOS, + false); } if (ieee80211_hw_check(&local->hw, SUPPORTS_REORDERING_BUFFER)) { @@ -444,20 +435,6 @@ end: timeout, addbaext); } -static void __ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, - u16 tid, u16 buf_size, bool tx, - bool auto_seq, - const struct ieee80211_addba_ext_ie *addbaext) -{ - mutex_lock(&sta->ampdu_mlme.mtx); - ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout, - start_seq_num, ba_policy, tid, - buf_size, tx, auto_seq, addbaext); - mutex_unlock(&sta->ampdu_mlme.mtx); -} - void ieee80211_process_addba_request(struct ieee80211_local *local, struct sta_info *sta, struct ieee80211_mgmt *mgmt, @@ -507,7 +484,6 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; struct sta_info *sta; rcu_read_lock(); @@ -516,7 +492,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, goto unlock; set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); } @@ -526,7 +502,6 @@ void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_local *local = sdata->local; struct sta_info *sta; rcu_read_lock(); @@ -535,7 +510,7 @@ void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, goto unlock; set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(sta->local->hw.wiphy, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b6b7726858..b8a278355e 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -142,7 +142,7 @@ EXPORT_SYMBOL(ieee80211_send_bar); void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx) { - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); lockdep_assert_held(&sta->lock); rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx); } @@ -213,7 +213,7 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); if (!txq) return; @@ -271,7 +271,7 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); lockdep_assert_held(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); @@ -296,8 +296,8 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) kfree_rcu(tid_tx, rcu_head); } -int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_agg_stop_reason reason) +int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_agg_stop_reason reason) { struct ieee80211_local *local = sta->local; struct tid_ampdu_tx *tid_tx; @@ -311,7 +311,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, }; int ret; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); switch (reason) { case AGG_STOP_DECLINED: @@ -461,7 +461,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, test_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state))) return; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); /* activate the timer for the recipient's addBA response */ mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL); @@ -497,7 +497,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .action = IEEE80211_AMPDU_TX_START, @@ -525,7 +525,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ synchronize_net(); - sdata = sta->sdata; params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); tid_tx->ssn = params.ssn; @@ -539,9 +538,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state); } else if (ret) { - if (!sdata) - return; - ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", sta->sta.addr, tid); @@ -743,7 +739,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, */ sta->ampdu_mlme.tid_start_tx[tid] = tid_tx; - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); /* this flow continues off the work */ err_unlock_sta: @@ -764,7 +760,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, .ssn = 0, }; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); params.buf_size = tid_tx->buf_size; @@ -801,7 +797,7 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; - lockdep_assert_held(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))) return; @@ -862,26 +858,12 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, goto out; set_bit(HT_AGG_STATE_START_CB, &tid_tx->state); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); out: rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); -int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_agg_stop_reason reason) -{ - int ret; - - mutex_lock(&sta->ampdu_mlme.mtx); - - ret = ___ieee80211_stop_tx_ba_session(sta, tid, reason); - - mutex_unlock(&sta->ampdu_mlme.mtx); - - return ret; -} - int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); @@ -916,7 +898,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) } set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); unlock: spin_unlock_bh(&sta->lock); @@ -976,7 +958,7 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, goto out; set_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state); - ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + wiphy_work_queue(local->hw.wiphy, &sta->ampdu_mlme.work); out: rcu_read_unlock(); } @@ -993,6 +975,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, u16 capab, tid, buf_size; bool amsdu; + lockdep_assert_wiphy(sta->local->hw.wiphy); + capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK); @@ -1003,16 +987,14 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (!amsdu && txq) set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags); - mutex_lock(&sta->ampdu_mlme.mtx); - tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (!tid_tx) - goto out; + return; if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n", sta->sta.addr, tid); - goto out; + return; } del_timer_sync(&tid_tx->addba_resp_timer); @@ -1030,7 +1012,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, ht_dbg(sta->sdata, "got addBA resp for %pM tid %d but we already gave up\n", sta->sta.addr, tid); - goto out; + return; } /* @@ -1044,7 +1026,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { /* ignore duplicate response */ - goto out; + return; } tid_tx->buf_size = buf_size; @@ -1065,9 +1047,6 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, } } else { - ___ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED); + __ieee80211_stop_tx_ba_session(sta, tid, AGG_STOP_DECLINED); } - - out: - mutex_unlock(&sta->ampdu_mlme.mtx); } diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index e8ebd343e2..fdf8b658fe 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -557,7 +557,7 @@ static int ieee80211_fill_rx_status(struct ieee80211_rx_status *stat, if (ieee80211_fill_rate_info(hw, stat, band, ri)) return 0; - if (rate->idx < 0 || !rate->count) + if (!ieee80211_rate_valid(rate)) return -1; if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH) @@ -632,7 +632,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, { struct ieee80211_supported_band *sband; struct ieee80211_chanctx_conf *conf; - int rateidx, shift = 0; + int rateidx; bool cck, short_pream; u32 basic_rates; u8 band = 0; @@ -641,10 +641,8 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, len += 38; /* Ethernet header length */ conf = rcu_dereference(vif->bss_conf.chanctx_conf); - if (conf) { + if (conf) band = conf->def.chan->band; - shift = ieee80211_chandef_get_shift(&conf->def); - } if (pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, @@ -704,7 +702,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, short_pream = vif->bss_conf.use_short_preamble; rateidx = basic_rates ? ffs(basic_rates) - 1 : 0; - rate = sband->bitrates[rateidx].bitrate << shift; + rate = sband->bitrates[rateidx].bitrate; cck = sband->bitrates[rateidx].flags & IEEE80211_RATE_MANDATORY_B; return ieee80211_calc_legacy_rate_duration(rate, short_pream, cck, len); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f7cb50b0dd..b382c2e0a3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/ieee80211.h> @@ -214,6 +214,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct sta_info *sta; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; @@ -235,12 +237,10 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (!ifmgd->associated) return 0; - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); if (sta) drv_sta_set_4addr(local, sdata, &sta->sta, params->use_4addr); - mutex_unlock(&local->sta_mtx); if (params->use_4addr) ieee80211_send_4addr_nullfunc(local, sdata); @@ -261,9 +261,9 @@ static int ieee80211_start_p2p_device(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; - mutex_lock(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); - mutex_unlock(&sdata->local->chanctx_mtx); if (ret < 0) return ret; @@ -283,9 +283,9 @@ static int ieee80211_start_nan(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); int ret; - mutex_lock(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); - mutex_unlock(&sdata->local->chanctx_mtx); if (ret < 0) return ret; @@ -452,13 +452,11 @@ static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, if (sta->ptk_idx == key_idx) return 0; - mutex_lock(&local->key_mtx); - key = key_mtx_dereference(local, sta->ptk[key_idx]); + key = wiphy_dereference(local->hw.wiphy, sta->ptk[key_idx]); if (key && key->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) ret = ieee80211_set_tx_key(key); - mutex_unlock(&local->key_mtx); return ret; } @@ -474,6 +472,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_key *key; int err; + lockdep_assert_wiphy(local->hw.wiphy); + if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; @@ -510,8 +510,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (params->mode == NL80211_KEY_NO_TX) key->conf.flags |= IEEE80211_KEY_FLAG_NO_AUTO_TX; - mutex_lock(&local->sta_mtx); - if (mac_addr) { sta = sta_info_get_bss(sdata, mac_addr); /* @@ -526,8 +524,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, */ if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { ieee80211_key_free_unused(key); - err = -ENOENT; - goto out_unlock; + return -ENOENT; } } @@ -570,9 +567,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (err == -EALREADY) err = 0; - out_unlock: - mutex_unlock(&local->sta_mtx); - return err; } @@ -585,8 +579,7 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, struct ieee80211_key *key; if (link_id >= 0) { - link = rcu_dereference_check(sdata->link[link_id], - lockdep_is_held(&sdata->wdev.mtx)); + link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return NULL; } @@ -601,7 +594,7 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, if (link_id >= 0) { link_sta = rcu_dereference_check(sta->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) return NULL; } else { @@ -609,30 +602,29 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, } if (pairwise && key_idx < NUM_DEFAULT_KEYS) - return rcu_dereference_check_key_mtx(local, - sta->ptk[key_idx]); + return wiphy_dereference(local->hw.wiphy, + sta->ptk[key_idx]); if (!pairwise && key_idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) - return rcu_dereference_check_key_mtx(local, - link_sta->gtk[key_idx]); + return wiphy_dereference(local->hw.wiphy, + link_sta->gtk[key_idx]); return NULL; } if (pairwise && key_idx < NUM_DEFAULT_KEYS) - return rcu_dereference_check_key_mtx(local, - sdata->keys[key_idx]); + return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); - key = rcu_dereference_check_key_mtx(local, link->gtk[key_idx]); + key = wiphy_dereference(local->hw.wiphy, link->gtk[key_idx]); if (key) return key; /* or maybe it was a WEP key */ if (key_idx < NUM_DEFAULT_KEYS) - return rcu_dereference_check_key_mtx(local, sdata->keys[key_idx]); + return wiphy_dereference(local->hw.wiphy, sdata->keys[key_idx]); return NULL; } @@ -644,25 +636,16 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; - int ret; - mutex_lock(&local->sta_mtx); - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); key = ieee80211_lookup_key(sdata, link_id, key_idx, pairwise, mac_addr); - if (!key) { - ret = -ENOENT; - goto out_unlock; - } + if (!key) + return -ENOENT; ieee80211_key_free(key, sdata->vif.type == NL80211_IFTYPE_STATION); - ret = 0; - out_unlock: - mutex_unlock(&local->key_mtx); - mutex_unlock(&local->sta_mtx); - - return ret; + return 0; } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, @@ -833,15 +816,11 @@ void sta_set_rate_info_tx(struct sta_info *sta, rinfo->nss = ieee80211_rate_get_vht_nss(rate); } else { struct ieee80211_supported_band *sband; - int shift = ieee80211_vif_get_shift(&sta->sdata->vif); - u16 brate; sband = ieee80211_get_sband(sta->sdata); WARN_ON_ONCE(sband && !sband->bitrates); - if (sband && sband->bitrates) { - brate = sband->bitrates[rate->idx].bitrate; - rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); - } + if (sband && sband->bitrates) + rinfo->legacy = sband->bitrates[rate->idx].bitrate; } if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH) rinfo->bw = RATE_INFO_BW_40; @@ -863,7 +842,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; int ret = -ENOENT; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_by_idx(sdata, idx); if (sta) { @@ -872,8 +851,6 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, sta_set_sinfo(sta, sinfo, true); } - mutex_unlock(&local->sta_mtx); - return ret; } @@ -893,7 +870,7 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; int ret = -ENOENT; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); if (sta) { @@ -901,8 +878,6 @@ static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, sta_set_sinfo(sta, sinfo, true); } - mutex_unlock(&local->sta_mtx); - return ret; } @@ -913,6 +888,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata; int ret = 0; + lockdep_assert_wiphy(local->hw.wiphy); + if (cfg80211_chandef_identical(&local->monitor_chandef, chandef)) return 0; @@ -920,22 +897,16 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy, sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata) { - sdata_lock(sdata); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); ret = ieee80211_link_use_channel(&sdata->deflink, chandef, IEEE80211_CHANCTX_EXCLUSIVE); - mutex_unlock(&local->mtx); - sdata_unlock(sdata); } } else { - mutex_lock(&local->mtx); if (local->open_count == local->monitors) { local->_oper_chandef = *chandef; ieee80211_hw_config(local, 0); } - mutex_unlock(&local->mtx); } if (ret == 0) @@ -987,51 +958,61 @@ static int ieee80211_set_fils_discovery(struct ieee80211_sub_if_data *sdata, struct fils_discovery_data *new, *old = NULL; struct ieee80211_fils_discovery *fd; - if (!params->tmpl || !params->tmpl_len) - return -EINVAL; + if (!params->update) + return 0; fd = &link_conf->fils_discovery; fd->min_interval = params->min_interval; fd->max_interval = params->max_interval; old = sdata_dereference(link->u.ap.fils_discovery, sdata); - new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); - if (!new) - return -ENOMEM; - new->len = params->tmpl_len; - memcpy(new->data, params->tmpl, params->tmpl_len); - rcu_assign_pointer(link->u.ap.fils_discovery, new); - if (old) kfree_rcu(old, rcu_head); - return 0; + if (params->tmpl && params->tmpl_len) { + new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); + if (!new) + return -ENOMEM; + new->len = params->tmpl_len; + memcpy(new->data, params->tmpl, params->tmpl_len); + rcu_assign_pointer(link->u.ap.fils_discovery, new); + } else { + RCU_INIT_POINTER(link->u.ap.fils_discovery, NULL); + } + + return BSS_CHANGED_FILS_DISCOVERY; } static int ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata, struct cfg80211_unsol_bcast_probe_resp *params, struct ieee80211_link_data *link, - struct ieee80211_bss_conf *link_conf) + struct ieee80211_bss_conf *link_conf, + u64 *changed) { struct unsol_bcast_probe_resp_data *new, *old = NULL; - if (!params->tmpl || !params->tmpl_len) - return -EINVAL; + if (!params->update) + return 0; - old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); - new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); - if (!new) - return -ENOMEM; - new->len = params->tmpl_len; - memcpy(new->data, params->tmpl, params->tmpl_len); - rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new); + link_conf->unsol_bcast_probe_resp_interval = params->interval; + old = sdata_dereference(link->u.ap.unsol_bcast_probe_resp, sdata); if (old) kfree_rcu(old, rcu_head); - link_conf->unsol_bcast_probe_resp_interval = params->interval; + if (params->tmpl && params->tmpl_len) { + new = kzalloc(sizeof(*new) + params->tmpl_len, GFP_KERNEL); + if (!new) + return -ENOMEM; + new->len = params->tmpl_len; + memcpy(new->data, params->tmpl, params->tmpl_len); + rcu_assign_pointer(link->u.ap.unsol_bcast_probe_resp, new); + } else { + RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL); + } + *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; return 0; } @@ -1278,6 +1259,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_link_data *link; struct ieee80211_bss_conf *link_conf; + lockdep_assert_wiphy(local->hw.wiphy); + link = sdata_dereference(sdata->link[link_id], sdata); if (!link) return -ENOLINK; @@ -1387,12 +1370,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return err; } - mutex_lock(&local->mtx); err = ieee80211_link_use_channel(link, ¶ms->chandef, IEEE80211_CHANCTX_SHARED); if (!err) ieee80211_link_copy_chanctx_to_vlans(link, false); - mutex_unlock(&local->mtx); if (err) { link_conf->beacon_int = prev_beacon_int; return err; @@ -1463,23 +1444,17 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (err < 0) goto error; - if (params->fils_discovery.max_interval) { - err = ieee80211_set_fils_discovery(sdata, - ¶ms->fils_discovery, - link, link_conf); - if (err < 0) - goto error; - changed |= BSS_CHANGED_FILS_DISCOVERY; - } + err = ieee80211_set_fils_discovery(sdata, ¶ms->fils_discovery, + link, link_conf); + if (err < 0) + goto error; + changed |= err; - if (params->unsol_bcast_probe_resp.interval) { - err = ieee80211_set_unsol_bcast_probe_resp(sdata, - ¶ms->unsol_bcast_probe_resp, - link, link_conf); - if (err < 0) - goto error; - changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP; - } + err = ieee80211_set_unsol_bcast_probe_resp(sdata, + ¶ms->unsol_bcast_probe_resp, + link, link_conf, &changed); + if (err < 0) + goto error; err = drv_start_ap(sdata->local, sdata, link_conf); if (err) { @@ -1503,26 +1478,26 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return 0; error: - mutex_lock(&local->mtx); ieee80211_link_release_channel(link); - mutex_unlock(&local->mtx); return err; } static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, - struct cfg80211_beacon_data *params) + struct cfg80211_ap_update *params) + { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_link_data *link; + struct cfg80211_beacon_data *beacon = ¶ms->beacon; struct beacon_data *old; int err; struct ieee80211_bss_conf *link_conf; u64 changed = 0; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(wiphy); - link = sdata_dereference(sdata->link[params->link_id], sdata); + link = sdata_dereference(sdata->link[beacon->link_id], sdata); if (!link) return -ENOLINK; @@ -1538,14 +1513,26 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, if (!old) return -ENOENT; - err = ieee80211_assign_beacon(sdata, link, params, NULL, NULL, + err = ieee80211_assign_beacon(sdata, link, beacon, NULL, NULL, &changed); if (err < 0) return err; - if (params->he_bss_color_valid && - params->he_bss_color.enabled != link_conf->he_bss_color.enabled) { - link_conf->he_bss_color.enabled = params->he_bss_color.enabled; + err = ieee80211_set_fils_discovery(sdata, ¶ms->fils_discovery, + link, link_conf); + if (err < 0) + return err; + changed |= err; + + err = ieee80211_set_unsol_bcast_probe_resp(sdata, + ¶ms->unsol_bcast_probe_resp, + link, link_conf, &changed); + if (err < 0) + return err; + + if (beacon->he_bss_color_valid && + beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) { + link_conf->he_bss_color.enabled = beacon->he_bss_color.enabled; changed |= BSS_CHANGED_HE_BSS_COLOR; } @@ -1579,7 +1566,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, sdata_dereference(sdata->link[link_id], sdata); struct ieee80211_bss_conf *link_conf = link->conf; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); old_beacon = sdata_dereference(link->u.ap.beacon, sdata); if (!old_beacon) @@ -1593,7 +1580,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, sdata); /* abort any running channel switch or color change */ - mutex_lock(&local->mtx); link_conf->csa_active = false; link_conf->color_change_active = false; if (link->csa_block_tx) { @@ -1602,8 +1588,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link->csa_block_tx = false; } - mutex_unlock(&local->mtx); - ieee80211_free_next_beacon(link); /* turn off carrier for this interface and dependent VLANs */ @@ -1646,7 +1630,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, if (sdata->wdev.cac_started) { chandef = link_conf->chandef; - cancel_delayed_work_sync(&link->dfs_cac_timer_work); + wiphy_delayed_work_cancel(wiphy, &link->dfs_cac_timer_work); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); @@ -1658,10 +1642,8 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); - mutex_lock(&local->mtx); ieee80211_link_copy_chanctx_to_vlans(link, true); ieee80211_link_release_channel(link); - mutex_unlock(&local->mtx); return 0; } @@ -1803,7 +1785,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sdata_dereference(sdata->link[link_id], sdata); struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); /* * If there are no changes, then accept a link that exist, @@ -1887,6 +1869,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, sband->band); } + ieee80211_sta_set_rx_nss(link_sta); + return ret; } @@ -2038,6 +2022,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata; int err; + lockdep_assert_wiphy(local->hw.wiphy); + if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); @@ -2081,9 +2067,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, * visible yet), sta_apply_parameters (and inner functions) require * the mutex due to other paths. */ - mutex_lock(&local->sta_mtx); err = sta_apply_parameters(local, sta, params); - mutex_unlock(&local->sta_mtx); if (err) { sta_info_free(local, sta); return err; @@ -2126,13 +2110,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, enum cfg80211_station_type statype; int err; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mac); - if (!sta) { - err = -ENOENT; - goto out_err; - } + if (!sta) + return -ENOENT; switch (sdata->vif.type) { case NL80211_IFTYPE_MESH_POINT: @@ -2162,22 +2144,19 @@ static int ieee80211_change_station(struct wiphy *wiphy, statype = CFG80211_STA_AP_CLIENT_UNASSOC; break; default: - err = -EOPNOTSUPP; - goto out_err; + return -EOPNOTSUPP; } err = cfg80211_check_station_change(wiphy, params, statype); if (err) - goto out_err; + return err; if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { - if (vlansdata->u.vlan.sta) { - err = -EBUSY; - goto out_err; - } + if (vlansdata->u.vlan.sta) + return -EBUSY; rcu_assign_pointer(vlansdata->u.vlan.sta, sta); __ieee80211_check_fast_rx_iface(vlansdata); @@ -2203,18 +2182,9 @@ static int ieee80211_change_station(struct wiphy *wiphy, } } - /* we use sta_info_get_bss() so this might be different */ - if (sdata != sta->sdata) { - mutex_lock_nested(&sta->sdata->wdev.mtx, 1); - err = sta_apply_parameters(local, sta, params); - mutex_unlock(&sta->sdata->wdev.mtx); - } else { - err = sta_apply_parameters(local, sta, params); - } + err = sta_apply_parameters(local, sta, params); if (err) - goto out_err; - - mutex_unlock(&local->sta_mtx); + return err; if (sdata->vif.type == NL80211_IFTYPE_STATION && params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) { @@ -2223,9 +2193,6 @@ static int ieee80211_change_station(struct wiphy *wiphy, } return 0; -out_err: - mutex_unlock(&local->sta_mtx); - return err; } #ifdef CONFIG_MAC80211_MESH @@ -2638,6 +2605,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; int err; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config)); err = copy_mesh_setup(ifmsh, setup); if (err) @@ -2649,10 +2618,8 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; - mutex_lock(&sdata->local->mtx); err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef, IEEE80211_CHANCTX_SHARED); - mutex_unlock(&sdata->local->mtx); if (err) return err; @@ -2663,11 +2630,11 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ieee80211_stop_mesh(sdata); - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); kfree(sdata->u.mesh.ie); - mutex_unlock(&sdata->local->mtx); return 0; } @@ -3025,6 +2992,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, bool update_txp_type = false; bool has_monitor = false; + lockdep_assert_wiphy(local->hw.wiphy); + if (wdev) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); @@ -3072,7 +3041,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, break; } - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { has_monitor = true; @@ -3088,7 +3056,6 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, continue; ieee80211_recalc_txpower(sdata, update_txp_type); } - mutex_unlock(&local->iflist_mtx); if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, @@ -3181,14 +3148,24 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; bool tdls_peer_found = false; - lockdep_assert_held(&sdata->wdev.mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION)) return -EINVAL; + if (ieee80211_vif_is_mld(&sdata->vif) && + !(sdata->vif.active_links & BIT(link->link_id))) + return 0; + old_req = link->u.mgd.req_smps; link->u.mgd.req_smps = smps_mode; + /* The driver indicated that EML is enabled for the interface, which + * implies that SMPS flows towards the AP should be stopped. + */ + if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE) + return 0; + if (old_req == smps_mode && smps_mode != IEEE80211_SMPS_AUTOMATIC) return 0; @@ -3202,7 +3179,7 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, link->conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) return 0; - ap = link->u.mgd.bssid; + ap = sdata->vif.cfg.ap_addr; rcu_read_lock(); list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) { @@ -3224,7 +3201,9 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata, /* send SM PS frame to AP */ err = ieee80211_send_smps_action(sdata, smps_mode, - ap, ap); + ap, ap, + ieee80211_vif_is_mld(&sdata->vif) ? + link->link_id : -1); if (err) link->u.mgd.req_smps = old_req; else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found) @@ -3254,7 +3233,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ - sdata_lock(sdata); for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; @@ -3265,7 +3243,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, __ieee80211_request_smps_mgd(sdata, link, link->u.mgd.req_smps); } - sdata_unlock(sdata); if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS)) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); @@ -3411,7 +3388,8 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; int err; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + if (!list_empty(&local->roc_list) || local->scanning) { err = -EBUSY; goto out_unlock; @@ -3426,12 +3404,10 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, if (err) goto out_unlock; - ieee80211_queue_delayed_work(&sdata->local->hw, - &sdata->deflink.dfs_cac_timer_work, - msecs_to_jiffies(cac_time_ms)); + wiphy_delayed_work_queue(wiphy, &sdata->deflink.dfs_cac_timer_work, + msecs_to_jiffies(cac_time_ms)); out_unlock: - mutex_unlock(&local->mtx); return err; } @@ -3441,20 +3417,21 @@ static void ieee80211_end_cac(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry(sdata, &local->interfaces, list) { /* it might be waiting for the local->mtx, but then * by the time it gets it, sdata->wdev.cac_started * will no longer be true */ - cancel_delayed_work(&sdata->deflink.dfs_cac_timer_work); + wiphy_delayed_work_cancel(wiphy, + &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { ieee80211_link_release_channel(&sdata->deflink); sdata->wdev.cac_started = false; } } - mutex_unlock(&local->mtx); } static struct cfg80211_beacon_data * @@ -3586,11 +3563,11 @@ void ieee80211_csa_finish(struct ieee80211_vif *vif) if (iter == sdata || iter->vif.mbssid_tx_vif != vif) continue; - ieee80211_queue_work(&iter->local->hw, - &iter->deflink.csa_finalize_work); + wiphy_work_queue(iter->local->hw.wiphy, + &iter->deflink.csa_finalize_work); } } - ieee80211_queue_work(&local->hw, &sdata->deflink.csa_finalize_work); + wiphy_work_queue(local->hw.wiphy, &sdata->deflink.csa_finalize_work); rcu_read_unlock(); } @@ -3646,15 +3623,14 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata, return 0; } -static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +static int __ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { + struct ieee80211_sub_if_data *sdata = link_data->sdata; struct ieee80211_local *local = sdata->local; u64 changed = 0; int err; - sdata_assert_lock(sdata); - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* * using reservation isn't immediate as it may be deferred until later @@ -3663,20 +3639,20 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) * completed successfully */ - if (sdata->deflink.reserved_chanctx) { + if (link_data->reserved_chanctx) { /* * with multi-vif csa driver may call ieee80211_csa_finish() * many times while waiting for other interfaces to use their * reservations */ - if (sdata->deflink.reserved_ready) + if (link_data->reserved_ready) return 0; return ieee80211_link_use_reserved_context(&sdata->deflink); } - if (!cfg80211_chandef_identical(&sdata->vif.bss_conf.chandef, - &sdata->deflink.csa_chandef)) + if (!cfg80211_chandef_identical(&link_data->conf->chandef, + &link_data->csa_chandef)) return -EINVAL; sdata->vif.bss_conf.csa_active = false; @@ -3691,57 +3667,53 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) changed |= BSS_CHANGED_EHT_PUNCTURING; } - ieee80211_link_info_change_notify(sdata, &sdata->deflink, changed); + ieee80211_link_info_change_notify(sdata, link_data, changed); - if (sdata->deflink.csa_block_tx) { + if (link_data->csa_block_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - sdata->deflink.csa_block_tx = false; + link_data->csa_block_tx = false; } - err = drv_post_channel_switch(sdata); + err = drv_post_channel_switch(link_data); if (err) return err; - cfg80211_ch_switch_notify(sdata->dev, &sdata->deflink.csa_chandef, 0, - sdata->vif.bss_conf.eht_puncturing); + cfg80211_ch_switch_notify(sdata->dev, &link_data->csa_chandef, + link_data->link_id, + link_data->conf->eht_puncturing); return 0; } -static void ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) +static void ieee80211_csa_finalize(struct ieee80211_link_data *link_data) { - if (__ieee80211_csa_finalize(sdata)) { + struct ieee80211_sub_if_data *sdata = link_data->sdata; + + if (__ieee80211_csa_finalize(link_data)) { sdata_info(sdata, "failed to finalize CSA, disconnecting\n"); cfg80211_stop_iface(sdata->local->hw.wiphy, &sdata->wdev, GFP_KERNEL); } } -void ieee80211_csa_finalize_work(struct work_struct *work) +void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work) { - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, - deflink.csa_finalize_work); + struct ieee80211_link_data *link = + container_of(work, struct ieee80211_link_data, csa_finalize_work); + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; - sdata_lock(sdata); - mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ - if (!sdata->vif.bss_conf.csa_active) - goto unlock; + if (!link->conf->csa_active) + return; if (!ieee80211_sdata_running(sdata)) - goto unlock; - - ieee80211_csa_finalize(sdata); + return; -unlock: - mutex_unlock(&local->chanctx_mtx); - mutex_unlock(&local->mtx); - sdata_unlock(sdata); + ieee80211_csa_finalize(link); } static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, @@ -3897,8 +3869,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, u64 changed = 0; int err; - sdata_assert_lock(sdata); - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!list_empty(&local->roc_list) || local->scanning) return -EBUSY; @@ -3914,9 +3885,8 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, if (sdata->vif.bss_conf.csa_active) return -EBUSY; - mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) { err = -EBUSY; goto out; @@ -3986,11 +3956,10 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, drv_channel_switch_beacon(sdata, ¶ms->chandef); } else { /* if the beacon didn't change, we can finalize immediately */ - ieee80211_csa_finalize(sdata); + ieee80211_csa_finalize(&sdata->deflink); } out: - mutex_unlock(&local->chanctx_mtx); return err; } @@ -3999,18 +3968,15 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - int err; - mutex_lock(&local->mtx); - err = __ieee80211_channel_switch(wiphy, dev, params); - mutex_unlock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - return err; + return __ieee80211_channel_switch(wiphy, dev, params); } u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local) { - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); local->roc_cookie_counter++; @@ -4042,7 +4008,8 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb, return -ENOMEM; } - IEEE80211_SKB_CB(skb)->ack_frame_id = id; + IEEE80211_SKB_CB(skb)->status_data_idr = 1; + IEEE80211_SKB_CB(skb)->status_data = id; *cookie = ieee80211_mgmt_tx_cookie(local); IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie; @@ -4092,11 +4059,17 @@ ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy, static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) { struct ieee80211_local *local = wiphy_priv(wiphy); + int ret; if (local->started) return -EOPNOTSUPP; - return drv_set_antenna(local, tx_ant, rx_ant); + ret = drv_set_antenna(local, tx_ant, rx_ant); + if (ret) + return ret; + + local->rx_chains = hweight8(rx_ant); + return 0; } static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) @@ -4138,7 +4111,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, int ret; /* the lock is needed to assign the cookie later */ - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); sta = sta_info_get_bss(sdata, peer); @@ -4209,7 +4182,6 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, ret = 0; unlock: rcu_read_unlock(); - mutex_unlock(&local->mtx); return ret; } @@ -4567,7 +4539,8 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; - int ret; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->set_tid_config) return -EOPNOTSUPP; @@ -4575,17 +4548,11 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy, if (!tid_conf->peer) return drv_set_tid_config(sdata->local, sdata, NULL, tid_conf); - mutex_lock(&sdata->local->sta_mtx); sta = sta_info_get_bss(sdata, tid_conf->peer); - if (!sta) { - mutex_unlock(&sdata->local->sta_mtx); + if (!sta) return -ENOENT; - } - - ret = drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); - mutex_unlock(&sdata->local->sta_mtx); - return ret; + return drv_set_tid_config(sdata->local, sdata, &sta->sta, tid_conf); } static int ieee80211_reset_tid_config(struct wiphy *wiphy, @@ -4594,7 +4561,8 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sta_info *sta; - int ret; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!sdata->local->ops->reset_tid_config) return -EOPNOTSUPP; @@ -4602,17 +4570,11 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy, if (!peer) return drv_reset_tid_config(sdata->local, sdata, NULL, tids); - mutex_lock(&sdata->local->sta_mtx); sta = sta_info_get_bss(sdata, peer); - if (!sta) { - mutex_unlock(&sdata->local->sta_mtx); + if (!sta) return -ENOENT; - } - ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); - mutex_unlock(&sdata->local->sta_mtx); - - return ret; + return drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids); } static int ieee80211_set_sar_specs(struct wiphy *wiphy, @@ -4698,6 +4660,8 @@ static void ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, u8 color, int enable, u64 changed) { + lockdep_assert_wiphy(sdata->local->hw.wiphy); + sdata->vif.bss_conf.he_bss_color.color = color; sdata->vif.bss_conf.he_bss_color.enabled = enable; changed |= BSS_CHANGED_HE_BSS_COLOR; @@ -4707,7 +4671,6 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, if (!sdata->vif.bss_conf.nontransmitted && sdata->vif.mbssid_tx_vif) { struct ieee80211_sub_if_data *child; - mutex_lock(&sdata->local->iflist_mtx); list_for_each_entry(child, &sdata->local->interfaces, list) { if (child != sdata && child->vif.mbssid_tx_vif == &sdata->vif) { child->vif.bss_conf.he_bss_color.color = color; @@ -4717,7 +4680,6 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_HE_BSS_COLOR); } } - mutex_unlock(&sdata->local->iflist_mtx); } } @@ -4727,8 +4689,7 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) u64 changed = 0; int err; - sdata_assert_lock(sdata); - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); sdata->vif.bss_conf.color_change_active = false; @@ -4746,28 +4707,24 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) return 0; } -void ieee80211_color_change_finalize_work(struct work_struct *work) +void ieee80211_color_change_finalize_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, deflink.color_change_finalize_work); struct ieee80211_local *local = sdata->local; - sdata_lock(sdata); - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* AP might have been stopped while waiting for the lock. */ if (!sdata->vif.bss_conf.color_change_active) - goto unlock; + return; if (!ieee80211_sdata_running(sdata)) - goto unlock; + return; ieee80211_color_change_finalize(sdata); - -unlock: - mutex_unlock(&local->mtx); - sdata_unlock(sdata); } void ieee80211_color_collision_detection_work(struct work_struct *work) @@ -4778,17 +4735,15 @@ void ieee80211_color_collision_detection_work(struct work_struct *work) color_collision_detect_work); struct ieee80211_sub_if_data *sdata = link->sdata; - sdata_lock(sdata); cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap); - sdata_unlock(sdata); } void ieee80211_color_change_finish(struct ieee80211_vif *vif) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - ieee80211_queue_work(&sdata->local->hw, - &sdata->deflink.color_change_finalize_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &sdata->deflink.color_change_finalize_work); } EXPORT_SYMBOL_GPL(ieee80211_color_change_finish); @@ -4824,13 +4779,11 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, u64 changed = 0; int err; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); if (sdata->vif.bss_conf.nontransmitted) return -EINVAL; - mutex_lock(&local->mtx); - /* don't allow another color change if one is already active or if csa * is active */ @@ -4855,7 +4808,6 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, ieee80211_color_change_finalize(sdata); out: - mutex_unlock(&local->mtx); return err; } @@ -4877,16 +4829,13 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - int res; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (wdev->use_4addr) return -EOPNOTSUPP; - mutex_lock(&sdata->local->mtx); - res = ieee80211_vif_set_links(sdata, wdev->valid_links, 0); - mutex_unlock(&sdata->local->mtx); - - return res; + return ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } static void ieee80211_del_intf_link(struct wiphy *wiphy, @@ -4895,9 +4844,9 @@ static void ieee80211_del_intf_link(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - mutex_lock(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ieee80211_vif_set_links(sdata, wdev->valid_links, 0); - mutex_unlock(&sdata->local->mtx); } static int sta_add_link_station(struct ieee80211_local *local, @@ -4937,13 +4886,10 @@ ieee80211_add_link_station(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); - int ret; - mutex_lock(&sdata->local->sta_mtx); - ret = sta_add_link_station(local, sdata, params); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + return sta_add_link_station(local, sdata, params); } static int sta_mod_link_station(struct ieee80211_local *local, @@ -4968,13 +4914,10 @@ ieee80211_mod_link_station(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); - int ret; - mutex_lock(&sdata->local->sta_mtx); - ret = sta_mod_link_station(local, sdata, params); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + return sta_mod_link_station(local, sdata, params); } static int sta_del_link_station(struct ieee80211_sub_if_data *sdata, @@ -5003,13 +4946,10 @@ ieee80211_del_link_station(struct wiphy *wiphy, struct net_device *dev, struct link_station_del_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - int ret; - mutex_lock(&sdata->local->sta_mtx); - ret = sta_del_link_station(sdata, params); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + return sta_del_link_station(sdata, params); } static int ieee80211_set_hw_timestamp(struct wiphy *wiphy, diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 68952752b5..1d928f29ad 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -18,7 +18,7 @@ static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, struct ieee80211_link_data *link; int num = 0; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list) num++; @@ -32,7 +32,7 @@ static int ieee80211_chanctx_num_reserved(struct ieee80211_local *local, struct ieee80211_link_data *link; int num = 0; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) num++; @@ -52,7 +52,7 @@ static int ieee80211_num_chanctx(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; int num = 0; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(ctx, &local->chanctx_list, list) num++; @@ -62,7 +62,8 @@ static int ieee80211_num_chanctx(struct ieee80211_local *local) static bool ieee80211_can_create_new_chanctx(struct ieee80211_local *local) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + return ieee80211_num_chanctx(local) < ieee80211_max_num_channels(local); } @@ -73,7 +74,7 @@ ieee80211_link_get_chanctx(struct ieee80211_link_data *link) struct ieee80211_chanctx_conf *conf; conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) return NULL; @@ -87,7 +88,7 @@ ieee80211_chanctx_reserved_chandef(struct ieee80211_local *local, { struct ieee80211_link_data *link; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(link, &ctx->reserved_links, reserved_chanctx_list) { @@ -110,7 +111,7 @@ ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, { struct ieee80211_link_data *link; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(link, &ctx->assigned_links, assigned_chanctx_list) { @@ -136,7 +137,7 @@ ieee80211_chanctx_combined_chandef(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *compat) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); compat = ieee80211_chanctx_reserved_chandef(local, ctx, compat); if (!compat) @@ -154,7 +155,7 @@ ieee80211_chanctx_can_reserve_chandef(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct cfg80211_chan_def *def) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (ieee80211_chanctx_combined_chandef(local, ctx, def)) return true; @@ -173,7 +174,7 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local, { struct ieee80211_chanctx *ctx; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (mode == IEEE80211_CHANCTX_EXCLUSIVE) return NULL; @@ -361,7 +362,7 @@ _ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, enum nl80211_chan_width max_bw; struct cfg80211_chan_def min_def; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* don't optimize non-20MHz based and radar_enabled confs */ if (ctx->conf.def.width == NL80211_CHAN_WIDTH_5 || @@ -537,7 +538,7 @@ ieee80211_find_chanctx(struct ieee80211_local *local, { struct ieee80211_chanctx *ctx; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (mode == IEEE80211_CHANCTX_EXCLUSIVE) return NULL; @@ -572,7 +573,7 @@ bool ieee80211_is_radar_required(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -602,8 +603,7 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; bool required = false; - lockdep_assert_held(&local->chanctx_mtx); - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -641,7 +641,7 @@ ieee80211_alloc_chanctx(struct ieee80211_local *local, { struct ieee80211_chanctx *ctx; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); ctx = kzalloc(sizeof(*ctx) + local->hw.chanctx_data_size, GFP_KERNEL); if (!ctx) @@ -665,8 +665,7 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local, u32 changed; int err; - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->use_chanctx) local->hw.conf.radar_enabled = ctx->conf.radar_enabled; @@ -698,8 +697,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx; int err; - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); ctx = ieee80211_alloc_chanctx(local, chandef, mode); if (!ctx) @@ -718,7 +716,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local, static void ieee80211_del_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->use_chanctx) { struct cfg80211_chan_def *chandef = &local->_oper_chandef; @@ -753,7 +751,7 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local, static void ieee80211_free_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); WARN_ON_ONCE(ieee80211_chanctx_refcount(local, ctx) != 0); @@ -770,7 +768,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, const struct cfg80211_chan_def *compat = NULL; struct sta_info *sta; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -833,9 +831,7 @@ static void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, { bool radar_enabled; - lockdep_assert_held(&local->chanctx_mtx); - /* for ieee80211_is_radar_required */ - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); radar_enabled = ieee80211_chanctx_radar_required(local, chanctx); @@ -865,7 +861,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link, return -ENOTSUPP; conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (conf) { curr_ctx = container_of(conf, struct ieee80211_chanctx, conf); @@ -920,7 +916,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata; u8 rx_chains_static, rx_chains_dynamic; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); rx_chains_static = 1; rx_chains_dynamic = 1; @@ -1023,7 +1019,7 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP)) return; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* Check that conf exists, even when clearing this function * must be called with the AP's channel context still there @@ -1032,7 +1028,7 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, * to a channel context that has already been freed. */ conf = rcu_dereference_protected(link_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); WARN_ON(!conf); if (clear) @@ -1056,11 +1052,9 @@ void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, { struct ieee80211_local *local = link->sdata->local; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); __ieee80211_link_copy_chanctx_to_vlans(link, clear); - - mutex_unlock(&local->chanctx_mtx); } int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link) @@ -1068,7 +1062,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_chanctx *ctx = link->reserved_chanctx; - lockdep_assert_held(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON(!ctx)) return -EINVAL; @@ -1108,7 +1102,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *new_ctx, *curr_ctx, *ctx; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); curr_ctx = ieee80211_link_get_chanctx(link); if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx) @@ -1206,8 +1200,8 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: - ieee80211_queue_work(&sdata->local->hw, - &link->csa_finalize_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &link->csa_finalize_work); break; case NL80211_IFTYPE_STATION: wiphy_delayed_work_queue(sdata->local->hw.wiphy, @@ -1265,8 +1259,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link) u64 changed = 0; int err; - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); new_ctx = link->reserved_chanctx; old_ctx = ieee80211_link_get_chanctx(link); @@ -1390,7 +1383,7 @@ ieee80211_link_has_in_place_reservation(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_chanctx *old_ctx, *new_ctx; - lockdep_assert_held(&sdata->local->chanctx_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); new_ctx = link->reserved_chanctx; old_ctx = ieee80211_link_get_chanctx(link); @@ -1415,8 +1408,7 @@ static int ieee80211_chsw_switch_hwconf(struct ieee80211_local *local, { const struct cfg80211_chan_def *chandef; - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); chandef = ieee80211_chanctx_reserved_chandef(local, new_ctx, NULL); if (WARN_ON(!chandef)) @@ -1437,8 +1429,7 @@ static int ieee80211_chsw_switch_vifs(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, *old_ctx; int i, err; - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); vif_chsw = kcalloc(n_vifs, sizeof(vif_chsw[0]), GFP_KERNEL); if (!vif_chsw) @@ -1482,8 +1473,7 @@ static int ieee80211_chsw_switch_ctxs(struct ieee80211_local *local) struct ieee80211_chanctx *ctx; int err; - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) @@ -1523,8 +1513,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) int err, n_assigned, n_reserved, n_ready; int n_ctx = 0, n_vifs_switch = 0, n_vifs_assign = 0, n_vifs_ctxless = 0; - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* * If there are 2 independent pairs of channel contexts performing @@ -1783,10 +1772,10 @@ static void __ieee80211_link_release_channel(struct ieee80211_link_data *link) struct ieee80211_chanctx *ctx; bool use_reserved_switch = false; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); conf = rcu_dereference_protected(link_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) return; @@ -1821,7 +1810,7 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, u8 radar_detect_width = 0; int ret; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (sdata->vif.active_links && !(sdata->vif.active_links & BIT(link->link_id))) { @@ -1829,8 +1818,6 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, return 0; } - mutex_lock(&local->chanctx_mtx); - ret = cfg80211_chandef_dfs_required(local->hw.wiphy, chandef, sdata->wdev.iftype); @@ -1872,7 +1859,6 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, if (ret) link->radar_required = false; - mutex_unlock(&local->chanctx_mtx); return ret; } @@ -1884,8 +1870,7 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link) struct ieee80211_chanctx *old_ctx; int err; - lockdep_assert_held(&local->mtx); - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); new_ctx = link->reserved_chanctx; old_ctx = ieee80211_link_get_chanctx(link); @@ -1948,51 +1933,40 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link, struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; const struct cfg80211_chan_def *compat; - int ret; + + lockdep_assert_wiphy(local->hw.wiphy); if (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, IEEE80211_CHAN_DISABLED)) return -EINVAL; - mutex_lock(&local->chanctx_mtx); - if (cfg80211_chandef_identical(chandef, &link_conf->chandef)) { - ret = 0; - goto out; - } + if (cfg80211_chandef_identical(chandef, &link_conf->chandef)) + return 0; if (chandef->width == NL80211_CHAN_WIDTH_20_NOHT || - link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) { - ret = -EINVAL; - goto out; - } + link_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + return -EINVAL; conf = rcu_dereference_protected(link_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - if (!conf) { - ret = -EINVAL; - goto out; - } + lockdep_is_held(&local->hw.wiphy->mtx)); + if (!conf) + return -EINVAL; ctx = container_of(conf, struct ieee80211_chanctx, conf); compat = cfg80211_chandef_compatible(&conf->def, chandef); - if (!compat) { - ret = -EINVAL; - goto out; - } + if (!compat) + return -EINVAL; switch (ctx->replace_state) { case IEEE80211_CHANCTX_REPLACE_NONE: - if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat)) { - ret = -EBUSY; - goto out; - } + if (!ieee80211_chanctx_reserved_chandef(local, ctx, compat)) + return -EBUSY; break; case IEEE80211_CHANCTX_WILL_BE_REPLACED: /* TODO: Perhaps the bandwidth change could be treated as a * reservation itself? */ - ret = -EBUSY; - goto out; + return -EBUSY; case IEEE80211_CHANCTX_REPLACES_OTHER: /* channel context that is going to replace another channel * context doesn't really exist and shouldn't be assigned @@ -2006,22 +1980,17 @@ int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link, ieee80211_recalc_chanctx_chantype(local, ctx); *changed |= BSS_CHANGED_BANDWIDTH; - ret = 0; - out: - mutex_unlock(&local->chanctx_mtx); - return ret; + return 0; } void ieee80211_link_release_channel(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; - mutex_lock(&sdata->local->chanctx_mtx); - if (rcu_access_pointer(link->conf->chanctx_conf)) { - lockdep_assert_held(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (rcu_access_pointer(link->conf->chanctx_conf)) __ieee80211_link_release_channel(link); - } - mutex_unlock(&sdata->local->chanctx_mtx); } void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link) @@ -2034,20 +2003,19 @@ void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *ap; struct ieee80211_chanctx_conf *conf; + lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->bss)) return; ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - mutex_lock(&local->chanctx_mtx); - rcu_read_lock(); ap_conf = rcu_dereference(ap->vif.link_conf[link_id]); conf = rcu_dereference_protected(ap_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); rcu_assign_pointer(link_conf->chanctx_conf, conf); rcu_read_unlock(); - mutex_unlock(&local->chanctx_mtx); } void ieee80211_iter_chan_contexts_atomic( diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 207f772bd8..b575ae90e5 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -4,7 +4,7 @@ * * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018 - 2019, 2021-2022 Intel Corporation + * Copyright (C) 2018 - 2019, 2021-2023 Intel Corporation */ #include <linux/debugfs.h> @@ -288,10 +288,10 @@ static ssize_t aql_txq_limit_write(struct file *file, q_limit_low_old = local->aql_txq_limit_low[ac]; q_limit_high_old = local->aql_txq_limit_high[ac]; + wiphy_lock(local->hw.wiphy); local->aql_txq_limit_low[ac] = q_limit_low; local->aql_txq_limit_high[ac] = q_limit_high; - mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { /* If a sta has customized queue limits, keep it */ if (sta->airtime[ac].aql_limit_low == q_limit_low_old && @@ -300,7 +300,8 @@ static ssize_t aql_txq_limit_write(struct file *file, sta->airtime[ac].aql_limit_high = q_limit_high; } } - mutex_unlock(&local->sta_mtx); + wiphy_unlock(local->hw.wiphy); + return count; } @@ -594,9 +595,9 @@ static ssize_t format_devstat_counter(struct ieee80211_local *local, char buf[20]; int res; - rtnl_lock(); + wiphy_lock(local->hw.wiphy); res = drv_get_stats(local, &stats); - rtnl_unlock(); + wiphy_unlock(local->hw.wiphy); if (res) return res; res = printvalue(&stats, buf, sizeof(buf)); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 16a04330e7..7e54da5087 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -4,7 +4,7 @@ * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2015 Intel Deutschland GmbH - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation */ #include <linux/kobject.h> @@ -378,14 +378,14 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) if (!sdata->vif.debugfs_dir) return; - lockdep_assert_held(&sdata->local->key_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); debugfs_remove(sdata->debugfs.default_unicast_key); sdata->debugfs.default_unicast_key = NULL; if (sdata->default_unicast_key) { - key = key_mtx_dereference(sdata->local, - sdata->default_unicast_key); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->default_unicast_key); sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_unicast_key = debugfs_create_symlink("default_unicast_key", @@ -396,8 +396,8 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata) sdata->debugfs.default_multicast_key = NULL; if (sdata->deflink.default_multicast_key) { - key = key_mtx_dereference(sdata->local, - sdata->deflink.default_multicast_key); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->deflink.default_multicast_key); sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_multicast_key = debugfs_create_symlink("default_multicast_key", @@ -413,8 +413,8 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) if (!sdata->vif.debugfs_dir) return; - key = key_mtx_dereference(sdata->local, - sdata->deflink.default_mgmt_key); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->deflink.default_mgmt_key); if (key) { sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_mgmt_key = @@ -442,8 +442,8 @@ ieee80211_debugfs_key_add_beacon_default(struct ieee80211_sub_if_data *sdata) if (!sdata->vif.debugfs_dir) return; - key = key_mtx_dereference(sdata->local, - sdata->deflink.default_beacon_key); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->deflink.default_beacon_key); if (key) { sprintf(buf, "../keys/%d", key->debugfs.cnt); sdata->debugfs.default_beacon_key = diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 63250286dc..68596ef78b 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -22,46 +22,148 @@ #include "debugfs_netdev.h" #include "driver-ops.h" -static ssize_t ieee80211_if_read( - void *data, +struct ieee80211_if_read_sdata_data { + ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int); + struct ieee80211_sub_if_data *sdata; +}; + +static ssize_t ieee80211_if_read_sdata_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data) +{ + struct ieee80211_if_read_sdata_data *d = data; + + return d->format(d->sdata, buf, bufsize); +} + +static ssize_t ieee80211_if_read_sdata( + struct file *file, char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*format)(const void *, char *, int)) + ssize_t (*format)(const struct ieee80211_sub_if_data *sdata, char *, int)) { + struct ieee80211_sub_if_data *sdata = file->private_data; + struct ieee80211_if_read_sdata_data data = { + .format = format, + .sdata = sdata, + }; char buf[200]; - ssize_t ret = -EINVAL; - read_lock(&dev_base_lock); - ret = (*format)(data, buf, sizeof(buf)); - read_unlock(&dev_base_lock); + return wiphy_locked_debugfs_read(sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, ppos, + ieee80211_if_read_sdata_handler, + &data); +} + +struct ieee80211_if_write_sdata_data { + ssize_t (*write)(struct ieee80211_sub_if_data *, const char *, int); + struct ieee80211_sub_if_data *sdata; +}; - if (ret >= 0) - ret = simple_read_from_buffer(userbuf, count, ppos, buf, ret); +static ssize_t ieee80211_if_write_sdata_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data) +{ + struct ieee80211_if_write_sdata_data *d = data; - return ret; + return d->write(d->sdata, buf, count); } -static ssize_t ieee80211_if_write( - void *data, +static ssize_t ieee80211_if_write_sdata( + struct file *file, const char __user *userbuf, size_t count, loff_t *ppos, - ssize_t (*write)(void *, const char *, int)) + ssize_t (*write)(struct ieee80211_sub_if_data *sdata, const char *, int)) { + struct ieee80211_sub_if_data *sdata = file->private_data; + struct ieee80211_if_write_sdata_data data = { + .write = write, + .sdata = sdata, + }; char buf[64]; - ssize_t ret; - if (count >= sizeof(buf)) - return -E2BIG; + return wiphy_locked_debugfs_write(sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, + ieee80211_if_write_sdata_handler, + &data); +} + +struct ieee80211_if_read_link_data { + ssize_t (*format)(const struct ieee80211_link_data *, char *, int); + struct ieee80211_link_data *link; +}; + +static ssize_t ieee80211_if_read_link_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data) +{ + struct ieee80211_if_read_link_data *d = data; + + return d->format(d->link, buf, bufsize); +} + +static ssize_t ieee80211_if_read_link( + struct file *file, + char __user *userbuf, + size_t count, loff_t *ppos, + ssize_t (*format)(const struct ieee80211_link_data *link, char *, int)) +{ + struct ieee80211_link_data *link = file->private_data; + struct ieee80211_if_read_link_data data = { + .format = format, + .link = link, + }; + char buf[200]; + + return wiphy_locked_debugfs_read(link->sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, ppos, + ieee80211_if_read_link_handler, + &data); +} + +struct ieee80211_if_write_link_data { + ssize_t (*write)(struct ieee80211_link_data *, const char *, int); + struct ieee80211_link_data *link; +}; + +static ssize_t ieee80211_if_write_link_handler(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data) +{ + struct ieee80211_if_write_sdata_data *d = data; - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - buf[count] = '\0'; + return d->write(d->sdata, buf, count); +} - rtnl_lock(); - ret = (*write)(data, buf, count); - rtnl_unlock(); +static ssize_t ieee80211_if_write_link( + struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos, + ssize_t (*write)(struct ieee80211_link_data *link, const char *, int)) +{ + struct ieee80211_link_data *link = file->private_data; + struct ieee80211_if_write_link_data data = { + .write = write, + .link = link, + }; + char buf[64]; - return ret; + return wiphy_locked_debugfs_write(link->sdata->local->hw.wiphy, + file, buf, sizeof(buf), + userbuf, count, + ieee80211_if_write_link_handler, + &data); } #define IEEE80211_IF_FMT(name, type, field, format_string) \ @@ -126,41 +228,37 @@ static const struct file_operations name##_ops = { \ .llseek = generic_file_llseek, \ } -#define _IEEE80211_IF_FILE_R_FN(name, type) \ +#define _IEEE80211_IF_FILE_R_FN(name) \ static ssize_t ieee80211_if_read_##name(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - ssize_t (*fn)(const void *, char *, int) = (void *) \ - ((ssize_t (*)(const type, char *, int)) \ - ieee80211_if_fmt_##name); \ - return ieee80211_if_read(file->private_data, \ - userbuf, count, ppos, fn); \ + return ieee80211_if_read_sdata(file, \ + userbuf, count, ppos, \ + ieee80211_if_fmt_##name); \ } -#define _IEEE80211_IF_FILE_W_FN(name, type) \ +#define _IEEE80211_IF_FILE_W_FN(name) \ static ssize_t ieee80211_if_write_##name(struct file *file, \ const char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ - ssize_t (*fn)(void *, const char *, int) = (void *) \ - ((ssize_t (*)(type, const char *, int)) \ - ieee80211_if_parse_##name); \ - return ieee80211_if_write(file->private_data, userbuf, count, \ - ppos, fn); \ + return ieee80211_if_write_sdata(file, userbuf, \ + count, ppos, \ + ieee80211_if_parse_##name); \ } #define IEEE80211_IF_FILE_R(name) \ - _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_R_FN(name) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, NULL) #define IEEE80211_IF_FILE_W(name) \ - _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_W_FN(name) \ _IEEE80211_IF_FILE_OPS(name, NULL, ieee80211_if_write_##name) #define IEEE80211_IF_FILE_RW(name) \ - _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_sub_if_data *) \ - _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_sub_if_data *) \ + _IEEE80211_IF_FILE_R_FN(name) \ + _IEEE80211_IF_FILE_W_FN(name) \ _IEEE80211_IF_FILE_OPS(name, ieee80211_if_read_##name, \ ieee80211_if_write_##name) @@ -168,18 +266,37 @@ static ssize_t ieee80211_if_write_##name(struct file *file, \ IEEE80211_IF_FMT_##format(name, struct ieee80211_sub_if_data, field) \ IEEE80211_IF_FILE_R(name) -/* Same but with a link_ prefix in the ops variable name and different type */ +#define _IEEE80211_IF_LINK_R_FN(name) \ +static ssize_t ieee80211_if_read_##name(struct file *file, \ + char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return ieee80211_if_read_link(file, \ + userbuf, count, ppos, \ + ieee80211_if_fmt_##name); \ +} + +#define _IEEE80211_IF_LINK_W_FN(name) \ +static ssize_t ieee80211_if_write_##name(struct file *file, \ + const char __user *userbuf, \ + size_t count, loff_t *ppos) \ +{ \ + return ieee80211_if_write_link(file, userbuf, \ + count, ppos, \ + ieee80211_if_parse_##name); \ +} + #define IEEE80211_IF_LINK_FILE_R(name) \ - _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_LINK_R_FN(name) \ _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, NULL) #define IEEE80211_IF_LINK_FILE_W(name) \ - _IEEE80211_IF_FILE_W_FN(name) \ + _IEEE80211_IF_LINK_W_FN(name) \ _IEEE80211_IF_FILE_OPS(link_##name, NULL, ieee80211_if_write_##name) #define IEEE80211_IF_LINK_FILE_RW(name) \ - _IEEE80211_IF_FILE_R_FN(name, struct ieee80211_link_data *) \ - _IEEE80211_IF_FILE_W_FN(name, struct ieee80211_link_data *) \ + _IEEE80211_IF_LINK_R_FN(name) \ + _IEEE80211_IF_LINK_W_FN(name) \ _IEEE80211_IF_FILE_OPS(link_##name, ieee80211_if_read_##name, \ ieee80211_if_write_##name) @@ -265,9 +382,11 @@ static int ieee80211_set_smps(struct ieee80211_link_data *link, { struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; - int err; - if (sdata->vif.driver_flags & IEEE80211_VIF_DISABLE_SMPS_OVERRIDE) + /* The driver indicated that EML is enabled for the interface, thus do + * not allow to override the SMPS state. + */ + if (sdata->vif.driver_flags & IEEE80211_VIF_EML_ACTIVE) return -EOPNOTSUPP; if (!(local->hw.wiphy->features & NL80211_FEATURE_STATIC_SMPS) && @@ -283,11 +402,7 @@ static int ieee80211_set_smps(struct ieee80211_link_data *link, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; - sdata_lock(sdata); - err = __ieee80211_request_smps_mgd(link->sdata, link, smps_mode); - sdata_unlock(sdata); - - return err; + return __ieee80211_request_smps_mgd(link->sdata, link, smps_mode); } static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = { @@ -359,16 +474,13 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ - sdata_lock(sdata); if (!sdata->u.mgd.associated) { - sdata_unlock(sdata); dev_kfree_skb(skb); return -ENOTCONN; } memcpy(hdr->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, addr, ETH_ALEN); - sdata_unlock(sdata); break; default: dev_kfree_skb(skb); @@ -885,18 +997,20 @@ static void add_link_files(struct ieee80211_link_data *link, } } -void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata) +static void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata, + bool mld_vif) { char buf[10+IFNAMSIZ]; sprintf(buf, "netdev:%s", sdata->name); sdata->vif.debugfs_dir = debugfs_create_dir(buf, sdata->local->hw.wiphy->debugfsdir); + /* deflink also has this */ + sdata->deflink.debugfs_dir = sdata->vif.debugfs_dir; sdata->debugfs.subdir_stations = debugfs_create_dir("stations", sdata->vif.debugfs_dir); add_files(sdata); - - if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) + if (!mld_vif) add_link_files(&sdata->deflink, sdata->vif.debugfs_dir); } @@ -924,11 +1038,24 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) debugfs_rename(dir->d_parent, dir, dir->d_parent, buf); } +void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, + bool mld_vif) +{ + ieee80211_debugfs_remove_netdev(sdata); + ieee80211_debugfs_add_netdev(sdata, mld_vif); + + if (sdata->flags & IEEE80211_SDATA_IN_DRIVER) { + drv_vif_add_debugfs(sdata->local, sdata); + if (!mld_vif) + ieee80211_link_debugfs_drv_add(&sdata->deflink); + } +} + void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) { char link_dir_name[10]; - if (WARN_ON(!link->sdata->vif.debugfs_dir)) + if (WARN_ON(!link->sdata->vif.debugfs_dir || link->debugfs_dir)) return; /* For now, this should not be called for non-MLO capable drivers */ @@ -965,7 +1092,8 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link) { - if (WARN_ON(!link->debugfs_dir)) + if (link->sdata->vif.type == NL80211_IFTYPE_MONITOR || + WARN_ON(!link->debugfs_dir)) return; drv_link_add_debugfs(link->sdata->local, link->sdata, diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index 99e688dcab..a02ec0a413 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -1,4 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions: + * Copyright (C) 2023 Intel Corporation + */ /* routines exported for debugfs handling */ #ifndef __IEEE80211_DEBUGFS_NETDEV_H @@ -7,9 +11,10 @@ #include "ieee80211_i.h" #ifdef CONFIG_MAC80211_DEBUGFS -void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata); +void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata, + bool mld_vif); void ieee80211_link_debugfs_add(struct ieee80211_link_data *link); void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link); @@ -17,16 +22,15 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link); void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link); void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link); #else -static inline void ieee80211_debugfs_add_netdev( - struct ieee80211_sub_if_data *sdata) -{} static inline void ieee80211_debugfs_remove_netdev( struct ieee80211_sub_if_data *sdata) {} static inline void ieee80211_debugfs_rename_netdev( struct ieee80211_sub_if_data *sdata) {} - +static inline void ieee80211_debugfs_recreate_netdev( + struct ieee80211_sub_if_data *sdata, bool mld_vif) +{} static inline void ieee80211_link_debugfs_add(struct ieee80211_link_data *link) {} static inline void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 5a97fb248c..5bf507ebb0 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -5,7 +5,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2022 Intel Corporation + * Copyright (C) 2018 - 2023 Intel Corporation */ #include <linux/debugfs.h> @@ -312,23 +312,14 @@ static ssize_t sta_aql_write(struct file *file, const char __user *userbuf, STA_OPS_RW(aql); -static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) +static ssize_t sta_agg_status_do_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsz, void *data) { - char *buf, *p; - ssize_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; + struct sta_info *sta = data; + char *p = buf; int i; - struct sta_info *sta = file->private_data; struct tid_ampdu_rx *tid_rx; struct tid_ampdu_tx *tid_tx; - ssize_t ret; - - buf = kzalloc(bufsz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - rcu_read_lock(); p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); @@ -338,8 +329,8 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, for (i = 0; i < IEEE80211_NUM_TIDS; i++) { bool tid_rx_valid; - tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]); - tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]); + tid_rx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_rx[i]); + tid_tx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_tx[i]); tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid); p += scnprintf(p, bufsz + buf - p, "%02d", i); @@ -358,31 +349,39 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, tid_tx ? skb_queue_len(&tid_tx->pending) : 0); p += scnprintf(p, bufsz + buf - p, "\n"); } - rcu_read_unlock(); - ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + return p - buf; +} + +static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct wiphy *wiphy = sta->local->hw.wiphy; + size_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; + char *buf = kmalloc(bufsz, GFP_KERNEL); + ssize_t ret; + + if (!buf) + return -ENOMEM; + + ret = wiphy_locked_debugfs_read(wiphy, file, buf, bufsz, + userbuf, count, ppos, + sta_agg_status_do_read, sta); kfree(buf); + return ret; } -static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) +static ssize_t sta_agg_status_do_write(struct wiphy *wiphy, struct file *file, + char *buf, size_t count, void *data) { - char _buf[25] = {}, *buf = _buf; - struct sta_info *sta = file->private_data; + struct sta_info *sta = data; bool start, tx; unsigned long tid; - char *pos; + char *pos = buf; int ret, timeout = 5000; - if (count > sizeof(_buf)) - return -EINVAL; - - if (copy_from_user(buf, userbuf, count)) - return -EFAULT; - - buf[sizeof(_buf) - 1] = '\0'; - pos = buf; buf = strsep(&pos, " "); if (!buf) return -EINVAL; @@ -434,6 +433,19 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu return ret ?: count; } + +static ssize_t sta_agg_status_write(struct file *file, + const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct wiphy *wiphy = sta->local->hw.wiphy; + char _buf[26]; + + return wiphy_locked_debugfs_write(wiphy, file, _buf, sizeof(_buf), + userbuf, count, + sta_agg_status_do_write, sta); +} STA_OPS_RW(agg_status); /* link sta attributes */ diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index f8af0c3d40..3b7f70073f 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -15,6 +15,7 @@ int drv_start(struct ieee80211_local *local) int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(local->started)) return -EALREADY; @@ -35,6 +36,7 @@ int drv_start(struct ieee80211_local *local) void drv_stop(struct ieee80211_local *local) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(!local->started)) return; @@ -58,6 +60,7 @@ int drv_add_interface(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && @@ -69,10 +72,18 @@ int drv_add_interface(struct ieee80211_local *local, ret = local->ops->add_interface(&local->hw, &sdata->vif); trace_drv_return_int(local, ret); - if (ret == 0) + if (ret) + return ret; + + if (!(sdata->flags & IEEE80211_SDATA_IN_DRIVER)) { sdata->flags |= IEEE80211_SDATA_IN_DRIVER; - return ret; + drv_vif_add_debugfs(local, sdata); + /* initially vif is not MLD */ + ieee80211_link_debugfs_drv_add(&sdata->deflink); + } + + return 0; } int drv_change_interface(struct ieee80211_local *local, @@ -82,6 +93,7 @@ int drv_change_interface(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -96,13 +108,18 @@ void drv_remove_interface(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; + sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; + + /* Remove driver debugfs entries */ + ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links); + trace_drv_remove_interface(local, sdata); local->ops->remove_interface(&local->hw, &sdata->vif); - sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER; trace_drv_return_void(local); } @@ -116,6 +133,7 @@ int drv_sta_state(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -149,6 +167,7 @@ int drv_sta_set_txpwr(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -190,6 +209,7 @@ int drv_conf_tx(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -223,6 +243,7 @@ u64 drv_get_tsf(struct ieee80211_local *local, u64 ret = -1ULL; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return ret; @@ -239,6 +260,7 @@ void drv_set_tsf(struct ieee80211_local *local, u64 tsf) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -254,6 +276,7 @@ void drv_offset_tsf(struct ieee80211_local *local, s64 offset) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -268,6 +291,7 @@ void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -285,7 +309,9 @@ int drv_assign_vif_chanctx(struct ieee80211_local *local, { int ret = 0; - drv_verify_link_exists(sdata, link_conf); + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!check_sdata_in_driver(sdata)) return -EIO; @@ -312,8 +338,8 @@ void drv_unassign_vif_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); - drv_verify_link_exists(sdata, link_conf); if (!check_sdata_in_driver(sdata)) return; @@ -340,6 +366,7 @@ int drv_switch_vif_chanctx(struct ieee80211_local *local, int i; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->ops->switch_vif_chanctx) return -EOPNOTSUPP; @@ -392,9 +419,7 @@ int drv_ampdu_action(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); - - if (!sdata) - return -EIO; + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -416,6 +441,7 @@ void drv_link_info_changed(struct ieee80211_local *local, int link_id, u64 changed) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | BSS_CHANGED_BEACON_ENABLED) && @@ -458,6 +484,7 @@ int drv_set_key(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -485,6 +512,7 @@ int drv_change_vif_links(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -510,7 +538,7 @@ int drv_change_vif_links(struct ieee80211_local *local, if (ret) return ret; - if (!local->in_reconfig) { + if (!local->in_reconfig && !local->resuming) { for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link = rcu_access_pointer(sdata->link[link_id]); @@ -535,6 +563,7 @@ int drv_change_sta_links(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -550,7 +579,7 @@ int drv_change_sta_links(struct ieee80211_local *local, for_each_set_bit(link_id, &links_to_rem, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); ieee80211_link_sta_debugfs_drv_remove(link_sta); } @@ -565,12 +594,12 @@ int drv_change_sta_links(struct ieee80211_local *local, return ret; /* during reconfig don't add it to debugfs again */ - if (local->in_reconfig) + if (local->in_reconfig || local->resuming) return 0; for_each_set_bit(link_id, &links_to_add, IEEE80211_MLD_MAX_NUM_LINKS) { link_sta = rcu_dereference_protected(info->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); ieee80211_link_sta_debugfs_drv_add(link_sta); } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 2bc2fbe58f..f690c385a3 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -40,6 +40,9 @@ static inline void drv_tx(struct ieee80211_local *local, static inline void drv_sync_rx_queues(struct ieee80211_local *local, struct sta_info *sta) { + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (local->ops->sync_rx_queues) { trace_drv_sync_rx_queues(local, sta->sdata, &sta->sta); local->ops->sync_rx_queues(&local->hw); @@ -94,6 +97,7 @@ static inline int drv_suspend(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_suspend(local); ret = local->ops->suspend(&local->hw, wowlan); @@ -106,6 +110,7 @@ static inline int drv_resume(struct ieee80211_local *local) int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_resume(local); ret = local->ops->resume(&local->hw); @@ -117,6 +122,7 @@ static inline void drv_set_wakeup(struct ieee80211_local *local, bool enabled) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->ops->set_wakeup) return; @@ -142,6 +148,7 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed) int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_config(local, changed); ret = local->ops->config(&local->hw, changed); @@ -154,6 +161,7 @@ static inline void drv_vif_cfg_changed(struct ieee80211_local *local, u64 changed) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -193,6 +201,7 @@ static inline void drv_configure_filter(struct ieee80211_local *local, u64 multicast) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_configure_filter(local, changed_flags, total_flags, multicast); @@ -207,6 +216,7 @@ static inline void drv_config_iface_filter(struct ieee80211_local *local, unsigned int changed_flags) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_config_iface_filter(local, sdata, filter_flags, changed_flags); @@ -263,6 +273,7 @@ static inline int drv_hw_scan(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -277,6 +288,7 @@ static inline void drv_cancel_hw_scan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -295,6 +307,7 @@ drv_sched_scan_start(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -312,6 +325,7 @@ static inline int drv_sched_scan_stop(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -328,6 +342,7 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local, const u8 *mac_addr) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_sw_scan_start(local, sdata, mac_addr); if (local->ops->sw_scan_start) @@ -339,6 +354,7 @@ static inline void drv_sw_scan_complete(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_sw_scan_complete(local, sdata); if (local->ops->sw_scan_complete) @@ -352,6 +368,7 @@ static inline int drv_get_stats(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->get_stats) ret = local->ops->get_stats(&local->hw, stats); @@ -375,6 +392,7 @@ static inline int drv_set_frag_threshold(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_set_frag_threshold(local, value); if (local->ops->set_frag_threshold) @@ -389,6 +407,7 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_set_rts_threshold(local, value); if (local->ops->set_rts_threshold) @@ -402,6 +421,7 @@ static inline int drv_set_coverage_class(struct ieee80211_local *local, { int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_set_coverage_class(local, value); if (local->ops->set_coverage_class) @@ -435,6 +455,7 @@ static inline int drv_sta_add(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -454,6 +475,7 @@ static inline void drv_sta_remove(struct ieee80211_local *local, struct ieee80211_sta *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -467,12 +489,30 @@ static inline void drv_sta_remove(struct ieee80211_local *local, } #ifdef CONFIG_MAC80211_DEBUGFS +static inline void drv_vif_add_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + + if (sdata->vif.type == NL80211_IFTYPE_MONITOR || + WARN_ON(!sdata->vif.debugfs_dir)) + return; + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return; + + if (local->ops->vif_add_debugfs) + local->ops->vif_add_debugfs(&local->hw, &sdata->vif); +} + static inline void drv_link_add_debugfs(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf, struct dentry *dir) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -489,6 +529,7 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, struct dentry *dir) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -505,6 +546,7 @@ static inline void drv_link_sta_add_debugfs(struct ieee80211_local *local, struct dentry *dir) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -514,6 +556,12 @@ static inline void drv_link_sta_add_debugfs(struct ieee80211_local *local, local->ops->link_sta_add_debugfs(&local->hw, &sdata->vif, link_sta, dir); } +#else +static inline void drv_vif_add_debugfs(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); +} #endif static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, @@ -521,6 +569,7 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, struct sta_info *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) @@ -569,6 +618,9 @@ static inline void drv_sta_statistics(struct ieee80211_local *local, struct ieee80211_sta *sta, struct station_info *sinfo) { + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) return; @@ -599,6 +651,7 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local) int ret = 0; /* default unsupported op for less congestion */ might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_tx_last_beacon(local); if (local->ops->tx_last_beacon) @@ -616,6 +669,9 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, { int ret = -EOPNOTSUPP; + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + trace_drv_get_survey(local, idx, survey); if (local->ops->get_survey) @@ -629,6 +685,7 @@ static inline int drv_get_survey(struct ieee80211_local *local, int idx, static inline void drv_rfkill_poll(struct ieee80211_local *local) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->rfkill_poll) local->ops->rfkill_poll(&local->hw); @@ -641,6 +698,7 @@ static inline void drv_flush(struct ieee80211_local *local, struct ieee80211_vif *vif; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); vif = sdata ? &sdata->vif : NULL; @@ -659,6 +717,7 @@ static inline void drv_flush_sta(struct ieee80211_local *local, struct sta_info *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); sdata = get_bss_sdata(sdata); @@ -676,6 +735,7 @@ static inline void drv_channel_switch(struct ieee80211_local *local, struct ieee80211_channel_switch *ch_switch) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_channel_switch(local, sdata, ch_switch); local->ops->channel_switch(&local->hw, &sdata->vif, ch_switch); @@ -688,6 +748,7 @@ static inline int drv_set_antenna(struct ieee80211_local *local, { int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->set_antenna) ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant); trace_drv_set_antenna(local, tx_ant, rx_ant, ret); @@ -699,6 +760,7 @@ static inline int drv_get_antenna(struct ieee80211_local *local, { int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (local->ops->get_antenna) ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant); trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret); @@ -714,6 +776,7 @@ static inline int drv_remain_on_channel(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_remain_on_channel(local, sdata, chan, duration, type); ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, @@ -730,6 +793,7 @@ drv_cancel_remain_on_channel(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_cancel_remain_on_channel(local, sdata); ret = local->ops->cancel_remain_on_channel(&local->hw, &sdata->vif); @@ -744,6 +808,7 @@ static inline int drv_set_ringparam(struct ieee80211_local *local, int ret = -ENOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_set_ringparam(local, tx, rx); if (local->ops->set_ringparam) @@ -757,6 +822,7 @@ static inline void drv_get_ringparam(struct ieee80211_local *local, u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_get_ringparam(local, tx, tx_max, rx, rx_max); if (local->ops->get_ringparam) @@ -769,6 +835,7 @@ static inline bool drv_tx_frames_pending(struct ieee80211_local *local) bool ret = false; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_tx_frames_pending(local); if (local->ops->tx_frames_pending) @@ -785,6 +852,7 @@ static inline int drv_set_bitrate_mask(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -802,6 +870,9 @@ static inline void drv_set_rekey_data(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct cfg80211_gtk_rekey_data *data) { + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!check_sdata_in_driver(sdata)) return; @@ -856,11 +927,13 @@ static inline void drv_mgd_prepare_tx(struct ieee80211_local *local, struct ieee80211_prep_tx_info *info) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + info->link_id = info->link_id < 0 ? 0 : info->link_id; trace_drv_mgd_prepare_tx(local, sdata, info->duration, info->subtype, info->success); if (local->ops->mgd_prepare_tx) @@ -873,6 +946,7 @@ static inline void drv_mgd_complete_tx(struct ieee80211_local *local, struct ieee80211_prep_tx_info *info) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -887,17 +961,22 @@ static inline void drv_mgd_complete_tx(struct ieee80211_local *local, static inline void drv_mgd_protect_tdls_discover(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) + struct ieee80211_sub_if_data *sdata, + int link_id) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_STATION); + link_id = link_id > 0 ? link_id : 0; + trace_drv_mgd_protect_tdls_discover(local, sdata); if (local->ops->mgd_protect_tdls_discover) - local->ops->mgd_protect_tdls_discover(&local->hw, &sdata->vif); + local->ops->mgd_protect_tdls_discover(&local->hw, &sdata->vif, + link_id); trace_drv_return_void(local); } @@ -907,6 +986,7 @@ static inline int drv_add_chanctx(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_add_chanctx(local, ctx); if (local->ops->add_chanctx) @@ -922,6 +1002,7 @@ static inline void drv_remove_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx *ctx) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(!ctx->driver_present)) return; @@ -938,6 +1019,7 @@ static inline void drv_change_chanctx(struct ieee80211_local *local, u32 changed) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_change_chanctx(local, ctx, changed); if (local->ops->change_chanctx) { @@ -947,14 +1029,6 @@ static inline void drv_change_chanctx(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline void drv_verify_link_exists(struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss_conf *link_conf) -{ - /* deflink always exists, so need to check only for other links */ - if (sdata->deflink.conf != link_conf) - sdata_assert_lock(sdata); -} - int drv_assign_vif_chanctx(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf, @@ -973,10 +1047,8 @@ static inline int drv_start_ap(struct ieee80211_local *local, { int ret = 0; - /* make sure link_conf is protected */ - drv_verify_link_exists(sdata, link_conf); - might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -992,8 +1064,8 @@ static inline void drv_stop_ap(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_conf *link_conf) { - /* make sure link_conf is protected */ - drv_verify_link_exists(sdata, link_conf); + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1009,6 +1081,7 @@ drv_reconfig_complete(struct ieee80211_local *local, enum ieee80211_reconfig_type reconfig_type) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); trace_drv_reconfig_complete(local, reconfig_type); if (local->ops->reconfig_complete) @@ -1021,6 +1094,9 @@ drv_set_default_unicast_key(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int key_idx) { + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!check_sdata_in_driver(sdata)) return; @@ -1051,6 +1127,9 @@ drv_channel_switch_beacon(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (local->ops->channel_switch_beacon) { trace_drv_channel_switch_beacon(local, sdata, chandef); local->ops->channel_switch_beacon(&local->hw, &sdata->vif, @@ -1065,6 +1144,9 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; int ret = 0; + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1077,17 +1159,22 @@ drv_pre_channel_switch(struct ieee80211_sub_if_data *sdata, } static inline int -drv_post_channel_switch(struct ieee80211_sub_if_data *sdata) +drv_post_channel_switch(struct ieee80211_link_data *link) { + struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; int ret = 0; + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!check_sdata_in_driver(sdata)) return -EIO; trace_drv_post_channel_switch(local, sdata); if (local->ops->post_channel_switch) - ret = local->ops->post_channel_switch(&local->hw, &sdata->vif); + ret = local->ops->post_channel_switch(&local->hw, &sdata->vif, + link->conf); trace_drv_return_int(local, ret); return ret; } @@ -1097,6 +1184,9 @@ drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!check_sdata_in_driver(sdata)) return; @@ -1112,6 +1202,9 @@ drv_channel_switch_rx_beacon(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!check_sdata_in_driver(sdata)) return; @@ -1127,6 +1220,7 @@ static inline int drv_join_ibss(struct ieee80211_local *local, int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1141,6 +1235,7 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1168,6 +1263,9 @@ static inline int drv_get_txpower(struct ieee80211_local *local, { int ret; + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!local->ops->get_txpower) return -EOPNOTSUPP; @@ -1187,6 +1285,7 @@ drv_tdls_channel_switch(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1207,6 +1306,7 @@ drv_tdls_cancel_channel_switch(struct ieee80211_local *local, struct ieee80211_sta *sta) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1272,6 +1372,11 @@ drv_get_ftm_responder_stats(struct ieee80211_local *local, { u32 ret = -EOPNOTSUPP; + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + if (!check_sdata_in_driver(sdata)) + return -EIO; + if (local->ops->get_ftm_responder_stats) ret = local->ops->get_ftm_responder_stats(&local->hw, &sdata->vif, @@ -1288,6 +1393,7 @@ static inline int drv_start_pmsr(struct ieee80211_local *local, int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return -EIO; @@ -1307,6 +1413,7 @@ static inline void drv_abort_pmsr(struct ieee80211_local *local, trace_drv_abort_pmsr(local, sdata); might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1322,6 +1429,7 @@ static inline int drv_start_nan(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); trace_drv_start_nan(local, sdata, conf); @@ -1334,6 +1442,7 @@ static inline void drv_stop_nan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); trace_drv_stop_nan(local, sdata); @@ -1349,6 +1458,7 @@ static inline int drv_nan_change_conf(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); if (!local->ops->nan_change_conf) @@ -1369,6 +1479,7 @@ static inline int drv_add_nan_func(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); if (!local->ops->add_nan_func) @@ -1386,6 +1497,7 @@ static inline void drv_del_nan_func(struct ieee80211_local *local, u8 instance_id) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); trace_drv_del_nan_func(local, sdata, instance_id); @@ -1402,6 +1514,7 @@ static inline int drv_set_tid_config(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); ret = local->ops->set_tid_config(&local->hw, &sdata->vif, sta, tid_conf); trace_drv_return_int(local, ret); @@ -1416,6 +1529,7 @@ static inline int drv_reset_tid_config(struct ieee80211_local *local, int ret; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tids); trace_drv_return_int(local, ret); @@ -1426,6 +1540,7 @@ static inline void drv_update_vif_offload(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); check_sdata_in_driver(sdata); if (!local->ops->update_vif_offload) @@ -1441,6 +1556,9 @@ static inline void drv_sta_set_4addr(struct ieee80211_local *local, struct ieee80211_sta *sta, bool enabled) { sdata = get_bss_sdata(sdata); + + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1456,6 +1574,9 @@ static inline void drv_sta_set_decap_offload(struct ieee80211_local *local, bool enabled) { sdata = get_bss_sdata(sdata); + + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1474,6 +1595,7 @@ static inline void drv_add_twt_setup(struct ieee80211_local *local, struct ieee80211_twt_params *twt_agrt; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1491,6 +1613,7 @@ static inline void drv_twt_teardown_request(struct ieee80211_local *local, u8 flowid) { might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); if (!check_sdata_in_driver(sdata)) return; @@ -1531,6 +1654,8 @@ static inline int drv_net_setup_tc(struct ieee80211_local *local, { int ret = -EOPNOTSUPP; + might_sleep(); + sdata = get_bss_sdata(sdata); trace_drv_net_setup_tc(local, sdata, type); if (local->ops->net_setup_tc) diff --git a/net/mac80211/drop.h b/net/mac80211/drop.h index 1570fac841..12a6f0e9ec 100644 --- a/net/mac80211/drop.h +++ b/net/mac80211/drop.h @@ -18,9 +18,54 @@ typedef unsigned int __bitwise ieee80211_rx_result; /* this line for the trailing \ - add before this */ #define MAC80211_DROP_REASONS_UNUSABLE(R) \ + /* 0x00 == ___RX_DROP_UNUSABLE */ \ R(RX_DROP_U_MIC_FAIL) \ R(RX_DROP_U_REPLAY) \ R(RX_DROP_U_BAD_MMIE) \ + R(RX_DROP_U_DUP) \ + R(RX_DROP_U_SPURIOUS) \ + R(RX_DROP_U_DECRYPT_FAIL) \ + R(RX_DROP_U_NO_KEY_ID) \ + R(RX_DROP_U_BAD_CIPHER) \ + R(RX_DROP_U_OOM) \ + R(RX_DROP_U_NONSEQ_PN) \ + R(RX_DROP_U_BAD_KEY_COLOR) \ + R(RX_DROP_U_BAD_4ADDR) \ + R(RX_DROP_U_BAD_AMSDU) \ + R(RX_DROP_U_BAD_AMSDU_CIPHER) \ + R(RX_DROP_U_INVALID_8023) \ + /* 0x10 */ \ + R(RX_DROP_U_RUNT_ACTION) \ + R(RX_DROP_U_UNPROT_ACTION) \ + R(RX_DROP_U_UNPROT_DUAL) \ + R(RX_DROP_U_UNPROT_UCAST_MGMT) \ + R(RX_DROP_U_UNPROT_MCAST_MGMT) \ + R(RX_DROP_U_UNPROT_BEACON) \ + R(RX_DROP_U_UNPROT_UNICAST_PUB_ACTION) \ + R(RX_DROP_U_UNPROT_ROBUST_ACTION) \ + R(RX_DROP_U_ACTION_UNKNOWN_SRC) \ + R(RX_DROP_U_REJECTED_ACTION_RESPONSE) \ + R(RX_DROP_U_EXPECT_DEFRAG_PROT) \ + R(RX_DROP_U_WEP_DEC_FAIL) \ + R(RX_DROP_U_NO_IV) \ + R(RX_DROP_U_NO_ICV) \ + R(RX_DROP_U_AP_RX_GROUPCAST) \ + R(RX_DROP_U_SHORT_MMIC) \ + /* 0x20 */ \ + R(RX_DROP_U_MMIC_FAIL) \ + R(RX_DROP_U_SHORT_TKIP) \ + R(RX_DROP_U_TKIP_FAIL) \ + R(RX_DROP_U_SHORT_CCMP) \ + R(RX_DROP_U_SHORT_CCMP_MIC) \ + R(RX_DROP_U_SHORT_GCMP) \ + R(RX_DROP_U_SHORT_GCMP_MIC) \ + R(RX_DROP_U_SHORT_CMAC) \ + R(RX_DROP_U_SHORT_CMAC256) \ + R(RX_DROP_U_SHORT_GMAC) \ + R(RX_DROP_U_UNEXPECTED_VLAN_4ADDR) \ + R(RX_DROP_U_UNEXPECTED_STA_4ADDR) \ + R(RX_DROP_U_UNEXPECTED_VLAN_MCAST) \ + R(RX_DROP_U_NOT_PORT_CONTROL) \ /* this line for the trailing \ - add before this */ /* having two enums allows for checking ieee80211_rx_result use with sparse */ @@ -46,7 +91,6 @@ enum mac80211_drop_reason { RX_CONTINUE = (__force ieee80211_rx_result)___RX_CONTINUE, RX_QUEUED = (__force ieee80211_rx_result)___RX_QUEUED, RX_DROP_MONITOR = (__force ieee80211_rx_result)___RX_DROP_MONITOR, - RX_DROP_UNUSABLE = (__force ieee80211_rx_result)___RX_DROP_UNUSABLE, #define DEF(x) x = (__force ieee80211_rx_result)___ ## x, MAC80211_DROP_REASONS_MONITOR(DEF) MAC80211_DROP_REASONS_UNUSABLE(DEF) diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index a3830d925c..99f6174a9d 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -5,7 +5,7 @@ * Copied from cfg.c - originally * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2014 Intel Corporation (Author: Johannes Berg) - * Copyright (C) 2018, 2022 Intel Corporation + * Copyright (C) 2018, 2022-2023 Intel Corporation */ #include <linux/types.h> #include <net/cfg80211.h> @@ -19,11 +19,16 @@ static int ieee80211_set_ringparam(struct net_device *dev, struct netlink_ext_ack *extack) { struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy); + int ret; if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) return -EINVAL; - return drv_set_ringparam(local, rp->tx_pending, rp->rx_pending); + wiphy_lock(local->hw.wiphy); + ret = drv_set_ringparam(local, rp->tx_pending, rp->rx_pending); + wiphy_unlock(local->hw.wiphy); + + return ret; } static void ieee80211_get_ringparam(struct net_device *dev, @@ -35,8 +40,10 @@ static void ieee80211_get_ringparam(struct net_device *dev, memset(rp, 0, sizeof(*rp)); + wiphy_lock(local->hw.wiphy); drv_get_ringparam(local, &rp->tx_pending, &rp->tx_max_pending, &rp->rx_pending, &rp->rx_max_pending); + wiphy_unlock(local->hw.wiphy); } static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { @@ -102,7 +109,7 @@ static void ieee80211_get_stats(struct net_device *dev, * network device. */ - mutex_lock(&local->sta_mtx); + wiphy_lock(local->hw.wiphy); if (sdata->vif.type == NL80211_IFTYPE_STATION) { sta = sta_info_get_bss(sdata, sdata->deflink.u.mgd.bssid); @@ -198,12 +205,13 @@ do_survey: else data[i++] = -1LL; - mutex_unlock(&local->sta_mtx); - - if (WARN_ON(i != STA_STATS_LEN)) + if (WARN_ON(i != STA_STATS_LEN)) { + wiphy_unlock(local->hw.wiphy); return; + } drv_get_et_stats(sdata, stats, &(data[STA_STATS_LEN])); + wiphy_unlock(local->hw.wiphy); } static void ieee80211_get_strings(struct net_device *dev, u32 sset, u8 *data) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index b337187289..749f4ecab9 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -317,16 +317,16 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, { int i; - mutex_lock(&sta->ampdu_mlme.mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); + for (i = 0; i < IEEE80211_NUM_TIDS; i++) - ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS, - reason != AGG_STOP_DESTROY_STA && - reason != AGG_STOP_PEER_REQUEST); + __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS, + reason != AGG_STOP_DESTROY_STA && + reason != AGG_STOP_PEER_REQUEST); for (i = 0; i < IEEE80211_NUM_TIDS; i++) - ___ieee80211_stop_tx_ba_session(sta, i, reason); - mutex_unlock(&sta->ampdu_mlme.mtx); + __ieee80211_stop_tx_ba_session(sta, i, reason); /* * In case the tear down is part of a reconfigure due to HW restart @@ -334,9 +334,8 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, * the BA session, so handle it to properly clean tid_tx data. */ if(reason == AGG_STOP_DESTROY_STA) { - cancel_work_sync(&sta->ampdu_mlme.work); + wiphy_work_cancel(sta->local->hw.wiphy, &sta->ampdu_mlme.work); - mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { struct tid_ampdu_tx *tid_tx = rcu_dereference_protected_tid_tx(sta, i); @@ -347,11 +346,10 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) ieee80211_stop_tx_ba_cb(sta, i, tid_tx); } - mutex_unlock(&sta->ampdu_mlme.mtx); } } -void ieee80211_ba_session_work(struct work_struct *work) +void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) { struct sta_info *sta = container_of(work, struct sta_info, ampdu_mlme.work); @@ -359,32 +357,33 @@ void ieee80211_ba_session_work(struct work_struct *work) bool blocked; int tid; + lockdep_assert_wiphy(sta->local->hw.wiphy); + /* When this flag is set, new sessions should be blocked. */ blocked = test_sta_flag(sta, WLAN_STA_BLOCK_BA); - mutex_lock(&sta->ampdu_mlme.mtx); for (tid = 0; tid < IEEE80211_NUM_TIDS; tid++) { if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired)) - ___ieee80211_stop_rx_ba_session( + __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_TIMEOUT, true); if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_stop_requested)) - ___ieee80211_stop_rx_ba_session( + __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); if (!blocked && test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl)) - ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, - IEEE80211_MAX_AMPDU_BUF_HT, - false, true, NULL); + __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, + IEEE80211_MAX_AMPDU_BUF_HT, + false, true, NULL); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) - ___ieee80211_stop_rx_ba_session( + __ieee80211_stop_rx_ba_session( sta, tid, WLAN_BACK_RECIPIENT, 0, false); @@ -415,9 +414,7 @@ void ieee80211_ba_session_work(struct work_struct *work) */ synchronize_net(); - mutex_unlock(&sta->ampdu_mlme.mtx); - - ieee80211_queue_work(&sdata->local->hw, work); + wiphy_work_queue(sdata->local->hw.wiphy, work); return; } @@ -449,12 +446,11 @@ void ieee80211_ba_session_work(struct work_struct *work) test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state)) ieee80211_start_tx_ba_cb(sta, tid, tid_tx); if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state)) - ___ieee80211_stop_tx_ba_session(sta, tid, - AGG_STOP_LOCAL_REQUEST); + __ieee80211_stop_tx_ba_session(sta, tid, + AGG_STOP_LOCAL_REQUEST); if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) ieee80211_stop_tx_ba_cb(sta, tid, tid_tx); } - mutex_unlock(&sta->ampdu_mlme.mtx); } void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, @@ -539,11 +535,13 @@ ieee80211_smps_mode_to_smps_mode(enum ieee80211_smps_mode smps) int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, - const u8 *bssid) + const u8 *bssid, int link_id) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *action_frame; + struct ieee80211_tx_info *info; + u8 status_link_id = link_id < 0 ? 0 : link_id; /* 27 = header + category + action + smps mode */ skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom); @@ -563,6 +561,7 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: WARN_ON(1); + smps = IEEE80211_SMPS_OFF; fallthrough; case IEEE80211_SMPS_OFF: action_frame->u.action.u.ht_smps.smps_control = @@ -579,8 +578,13 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, } /* we'll do more on status of this frame */ - IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; - ieee80211_tx_skb(sdata, skb); + info = IEEE80211_SKB_CB(skb); + info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + /* we have 12 bits, and need 6: link_id 4, smps 2 */ + info->status_data = IEEE80211_STATUS_TYPE_SMPS | + u16_encode_bits(status_link_id << 2 | smps, + IEEE80211_STATUS_SUBDATA_MASK); + ieee80211_tx_skb_tid(sdata, skb, 7, link_id); return 0; } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 5542c93edf..8b1e02f2f9 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -51,7 +51,6 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, u32 rate_flags, rates = 0, rates_added = 0; struct beacon_data *presp; int frame_len; - int shift; /* Build IBSS probe response */ frame_len = sizeof(struct ieee80211_hdr_3addr) + @@ -92,7 +91,6 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[chandef->chan->band]; rate_flags = ieee80211_chandef_rate_flags(chandef); - shift = ieee80211_chandef_get_shift(chandef); rates_n = 0; if (have_higher_than_11mbit) *have_higher_than_11mbit = false; @@ -111,8 +109,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, *pos++ = WLAN_EID_SUPP_RATES; *pos++ = min_t(int, 8, rates_n); for (ri = 0; ri < sband->n_bitrates; ri++) { - int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, - 5 * (1 << shift)); + int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, 5); u8 basic = 0; if (!(rates & BIT(ri))) continue; @@ -155,8 +152,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata, *pos++ = WLAN_EID_EXT_SUPP_RATES; *pos++ = rates_n - 8; for (; ri < sband->n_bitrates; ri++) { - int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, - 5 * (1 << shift)); + int rate = DIV_ROUND_UP(sband->bitrates[ri].bitrate, 5); u8 basic = 0; if (!(rates & BIT(ri))) continue; @@ -235,7 +231,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bool radar_required; int err; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local, sdata); @@ -299,17 +295,14 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, radar_required = err; - mutex_lock(&local->mtx); if (ieee80211_link_use_channel(&sdata->deflink, &chandef, ifibss->fixed_channel ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE)) { sdata_info(sdata, "Failed to join IBSS, no channel context\n"); - mutex_unlock(&local->mtx); return; } sdata->deflink.radar_required = radar_required; - mutex_unlock(&local->mtx); memcpy(ifibss->bssid, bssid, ETH_ALEN); @@ -367,9 +360,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->vif.cfg.ssid_len = 0; RCU_INIT_POINTER(ifibss->presp, NULL); kfree_rcu(presp, rcu_head); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&local->mtx); sdata_info(sdata, "Failed to join IBSS, driver failure: %d\n", err); return; @@ -382,7 +373,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); bss_meta.chan = chan; - bss_meta.scan_width = cfg80211_chandef_to_scan_width(&chandef); bss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, mgmt, presp->head_len, GFP_KERNEL); @@ -405,9 +395,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, enum nl80211_channel_type chan_type; u64 tsf; u32 rate_flags; - int shift; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (beacon_int < 10) beacon_int = 10; @@ -440,7 +429,6 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sband = sdata->local->hw.wiphy->bands[cbss->channel->band]; rate_flags = ieee80211_chandef_rate_flags(&sdata->u.ibss.chandef); - shift = ieee80211_vif_get_shift(&sdata->vif); basic_rates = 0; @@ -454,8 +442,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, != rate_flags) continue; - brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, - 5 * (1 << shift)); + brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, 5); if (brate == rate) { if (is_basic) basic_rates |= BIT(j); @@ -488,7 +475,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata, u16 capability = WLAN_CAPABILITY_IBSS; u64 tsf; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; @@ -530,7 +517,7 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct cfg80211_bss *cbss; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* When not connected/joined, sending CSA doesn't make sense. */ if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) @@ -600,7 +587,6 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; - enum nl80211_bss_scan_width scan_width; int band; /* @@ -629,7 +615,6 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, if (WARN_ON_ONCE(!chanctx_conf)) return NULL; band = chanctx_conf->def.chan->band; - scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def); rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_KERNEL); @@ -641,7 +626,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.deflink.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(sband, scan_width); + ieee80211_mandatory_rates(sband); return ieee80211_ibss_finish_sta(sta); } @@ -652,7 +637,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) int active = 0; struct sta_info *sta; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); rcu_read_lock(); @@ -680,6 +665,8 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) struct beacon_data *presp; struct sta_info *sta; + lockdep_assert_wiphy(local->hw.wiphy); + if (!is_zero_ether_addr(ifibss->bssid)) { cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, @@ -726,9 +713,7 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); drv_leave_ibss(local, sdata); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&local->mtx); } static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy, @@ -738,16 +723,12 @@ static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy, container_of(work, struct ieee80211_sub_if_data, u.ibss.csa_connection_drop_work); - sdata_lock(sdata); - ieee80211_ibss_disconnect(sdata); synchronize_rcu(); skb_queue_purge(&sdata->skb_queue); /* trigger a scan to find another IBSS network to join */ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work); - - sdata_unlock(sdata); } static void ieee80211_ibss_csa_mark_radar(struct ieee80211_sub_if_data *sdata) @@ -779,7 +760,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, ieee80211_conn_flags_t conn_flags; u32 vht_cap_info = 0; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); conn_flags = IEEE80211_CONN_DISABLE_VHT; @@ -951,7 +932,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, { u16 auth_alg, auth_transaction; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 6) return; @@ -984,7 +965,6 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, { struct sta_info *sta; enum nl80211_band band = rx_status->band; - enum nl80211_bss_scan_width scan_width; struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; bool rates_updated = false; @@ -1010,15 +990,9 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, u32 prev_rates; prev_rates = sta->sta.deflink.supp_rates[band]; - /* make sure mandatory rates are always added */ - scan_width = NL80211_BSS_CHAN_WIDTH_20; - if (rx_status->bw == RATE_INFO_BW_5) - scan_width = NL80211_BSS_CHAN_WIDTH_5; - else if (rx_status->bw == RATE_INFO_BW_10) - scan_width = NL80211_BSS_CHAN_WIDTH_10; sta->sta.deflink.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(sband, scan_width); + ieee80211_mandatory_rates(sband); if (sta->sta.deflink.supp_rates[band] != prev_rates) { ibss_dbg(sdata, "updated supp_rates set for %pM based on beacon/probe_resp (0x%x -> 0x%x)\n", @@ -1205,7 +1179,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; - enum nl80211_bss_scan_width scan_width; int band; /* @@ -1231,7 +1204,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, return; } band = chanctx_conf->def.chan->band; - scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def); rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); @@ -1241,7 +1213,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, /* make sure mandatory rates are always added */ sband = local->hw.wiphy->bands[band]; sta->sta.deflink.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(sband, scan_width); + ieee80211_mandatory_rates(sband); spin_lock(&ifibss->incomplete_lock); list_add(&sta->list, &ifibss->incomplete_stations); @@ -1257,7 +1229,7 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT; unsigned long exp_rsn = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { unsigned long last_active = ieee80211_sta_last_active(sta); @@ -1282,8 +1254,6 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) WARN_ON(__sta_info_destroy(sta)); } } - - mutex_unlock(&local->sta_mtx); } /* @@ -1293,9 +1263,8 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - enum nl80211_bss_scan_width scan_width; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); @@ -1315,9 +1284,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "No active IBSS STAs - trying to scan for other IBSS networks with same SSID (merge)\n"); - scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, - NULL, 0, scan_width); + NULL, 0); } static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) @@ -1327,7 +1295,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) u16 capability; int i; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); @@ -1435,10 +1403,9 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) struct cfg80211_bss *cbss; struct ieee80211_channel *chan = NULL; const u8 *bssid = NULL; - enum nl80211_bss_scan_width scan_width; int active_ibss; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); active_ibss = ieee80211_sta_active_ibss(sdata); ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); @@ -1494,8 +1461,6 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) sdata_info(sdata, "Trigger new scan to find an IBSS to join\n"); - scan_width = cfg80211_chandef_to_scan_width(&ifibss->chandef); - if (ifibss->fixed_channel) { num = ieee80211_ibss_setup_scan_channels(local->hw.wiphy, &ifibss->chandef, @@ -1503,11 +1468,10 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) ARRAY_SIZE(channels)); ieee80211_request_ibss_scan(sdata, ifibss->ssid, ifibss->ssid_len, channels, - num, scan_width); + num); } else { ieee80211_request_ibss_scan(sdata, ifibss->ssid, - ifibss->ssid_len, NULL, - 0, scan_width); + ifibss->ssid_len, NULL, 0); } } else { int interval = IEEE80211_SCAN_INTERVAL; @@ -1532,7 +1496,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; u8 *pos, *end; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); presp = sdata_dereference(ifibss->presp, sdata); @@ -1628,10 +1592,8 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - sdata_lock(sdata); - if (!sdata->u.ibss.ssid_len) - goto mgmt_out; /* not ready to merge yet */ + return; /* not ready to merge yet */ switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: @@ -1671,9 +1633,6 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, break; } } - - mgmt_out: - sdata_unlock(sdata); } void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) @@ -1681,15 +1640,13 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct sta_info *sta; - sdata_lock(sdata); - /* * Work could be scheduled after scan or similar * when we aren't even joined (or trying) with a * network. */ if (!ifibss->ssid_len) - goto out; + return; spin_lock_bh(&ifibss->incomplete_lock); while (!list_empty(&ifibss->incomplete_stations)) { @@ -1715,9 +1672,6 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) WARN_ON(1); break; } - - out: - sdata_unlock(sdata); } static void ieee80211_ibss_timer(struct timer_list *t) @@ -1744,7 +1698,8 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; - mutex_lock(&local->iflist_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; @@ -1752,7 +1707,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) continue; sdata->u.ibss.last_scan_completed = jiffies; } - mutex_unlock(&local->iflist_mtx); } int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, @@ -1767,6 +1721,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, int i; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + if (params->chandef.chan->freq_offset) { /* this may work, but is untested */ return -EOPNOTSUPP; @@ -1787,10 +1743,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, chanmode = (params->channel_fixed && !ret) ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE; - mutex_lock(&local->chanctx_mtx); ret = ieee80211_check_combinations(sdata, ¶ms->chandef, chanmode, radar_detect_width); - mutex_unlock(&local->chanctx_mtx); if (ret < 0) return ret; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 07beb72ddd..84df104f27 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -85,6 +85,12 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; #define IEEE80211_MAX_NAN_INSTANCE_ID 255 +enum ieee80211_status_data { + IEEE80211_STATUS_TYPE_MASK = 0x00f, + IEEE80211_STATUS_TYPE_INVALID = 0, + IEEE80211_STATUS_TYPE_SMPS = 1, + IEEE80211_STATUS_SUBDATA_MASK = 0xff0, +}; /* * Keep a station's queues on the active list for deficit accounting purposes @@ -461,13 +467,24 @@ struct ieee80211_sta_tx_tspec { bool downgraded; }; +/* Advertised TID-to-link mapping info */ +struct ieee80211_adv_ttlm_info { + /* time in TUs at which the new mapping is established, or 0 if there is + * no planned advertised TID-to-link mapping + */ + u16 switch_time; + u32 duration; /* duration of the planned T2L map in TUs */ + u16 map; /* map of usable links for all TIDs */ + bool active; /* whether the advertised mapping is active or not */ +}; + DECLARE_EWMA(beacon_signal, 4, 4) struct ieee80211_if_managed { struct timer_list timer; struct timer_list conn_mon_timer; struct timer_list bcn_mon_timer; - struct work_struct monitor_work; + struct wiphy_work monitor_work; struct wiphy_work beacon_connection_loss_work; struct wiphy_work csa_connection_drop_work; @@ -530,7 +547,7 @@ struct ieee80211_if_managed { /* TDLS support */ u8 tdls_peer[ETH_ALEN] __aligned(2); - struct delayed_work tdls_peer_del_work; + struct wiphy_delayed_work tdls_peer_del_work; struct sk_buff *orig_teardown_skb; /* The original teardown skb */ struct sk_buff *teardown_skb; /* A copy to send through the AP */ spinlock_t teardown_lock; /* To lock changing teardown_skb */ @@ -544,7 +561,7 @@ struct ieee80211_if_managed { * on the BE queue, but there's a lot of VO traffic, we might * get stuck in a downgraded situation and flush takes forever. */ - struct delayed_work tx_tspec_wk; + struct wiphy_delayed_work tx_tspec_wk; /* Information elements from the last transmitted (Re)Association * Request frame. @@ -554,6 +571,10 @@ struct ieee80211_if_managed { struct wiphy_delayed_work ml_reconf_work; u16 removed_links; + + /* TID-to-link mapping support */ + struct wiphy_delayed_work ttlm_work; + struct ieee80211_adv_ttlm_info ttlm_info; }; struct ieee80211_if_ibss { @@ -618,8 +639,9 @@ struct ieee80211_if_ocb { * these declarations define the interface, which enables * vendor-specific mesh synchronization * + * @rx_bcn_presp: beacon/probe response was received + * @adjust_tsf: TSF adjustment method */ -struct ieee802_11_elems; struct ieee80211_mesh_sync_ops { void (*rx_bcn_presp)(struct ieee80211_sub_if_data *sdata, u16 stype, struct ieee80211_mgmt *mgmt, unsigned int len, @@ -859,12 +881,13 @@ enum txq_info_flags { * struct txq_info - per tid queue * * @tin: contains packets split into multiple flows - * @def_flow: used as a fallback flow when a packet destined to @tin hashes to - * a fq_flow which is already owned by a different tin - * @def_cvars: codel vars for @def_flow + * @def_cvars: codel vars for the @tin's default_flow + * @cstats: code statistics for this queue * @frags: used to keep fragments created after dequeue * @schedule_order: used with ieee80211_local->active_txqs * @schedule_round: counter to prevent infinite loops on TXQ scheduling + * @flags: TXQ flags from &enum txq_info_flags + * @txq: the driver visible part */ struct txq_info { struct fq_tin tin; @@ -893,7 +916,8 @@ struct ieee80211_if_mntr { * struct ieee80211_if_nan - NAN state * * @conf: current NAN configuration - * @func_ids: a bitmap of available instance_id's + * @func_lock: lock for @func_inst_ids + * @function_inst_ids: a bitmap of available instance_id's */ struct ieee80211_if_nan { struct cfg80211_nan_conf conf; @@ -926,6 +950,9 @@ struct ieee80211_link_data_managed { struct wiphy_delayed_work chswitch_work; struct wiphy_work request_smps_work; + /* used to reconfigure hardware SM PS */ + struct wiphy_work recalc_smps; + bool beacon_crc_valid; u32 beacon_crc; struct ewma_beacon_signal ave_beacon_signal; @@ -970,8 +997,8 @@ struct ieee80211_link_data { struct ieee80211_sub_if_data *sdata; unsigned int link_id; - struct list_head assigned_chanctx_list; /* protected by chanctx_mtx */ - struct list_head reserved_chanctx_list; /* protected by chanctx_mtx */ + struct list_head assigned_chanctx_list; /* protected by wiphy mutex */ + struct list_head reserved_chanctx_list; /* protected by wiphy mutex */ /* multicast keys only */ struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + @@ -981,18 +1008,18 @@ struct ieee80211_link_data { struct ieee80211_key __rcu *default_mgmt_key; struct ieee80211_key __rcu *default_beacon_key; - struct work_struct csa_finalize_work; - bool csa_block_tx; /* write-protected by sdata_lock and local->mtx */ + struct wiphy_work csa_finalize_work; + bool csa_block_tx; bool operating_11g_mode; struct cfg80211_chan_def csa_chandef; - struct work_struct color_change_finalize_work; + struct wiphy_work color_change_finalize_work; struct delayed_work color_collision_detect_work; u64 color_bitmap; - /* context reservation -- protected with chanctx_mtx */ + /* context reservation -- protected with wiphy mutex */ struct ieee80211_chanctx *reserved_chanctx; struct cfg80211_chan_def reserved_chandef; bool reserved_radar_required; @@ -1005,7 +1032,7 @@ struct ieee80211_link_data { int ap_power_level; /* in dBm */ bool radar_required; - struct delayed_work dfs_cac_timer_work; + struct wiphy_delayed_work dfs_cac_timer_work; union { struct ieee80211_link_data_managed mgd; @@ -1032,7 +1059,7 @@ struct ieee80211_sub_if_data { /* count for keys needing tailroom space allocation */ int crypto_tx_tailroom_needed_cnt; int crypto_tx_tailroom_pending_dec; - struct delayed_work dec_tailroom_needed_wk; + struct wiphy_delayed_work dec_tailroom_needed_wk; struct net_device *dev; struct ieee80211_local *local; @@ -1064,9 +1091,6 @@ struct ieee80211_sub_if_data { atomic_t num_tx_queued; struct mac80211_qos_map __rcu *qos_map; - /* used to reconfigure hardware SM PS */ - struct work_struct recalc_smps; - struct wiphy_work work; struct sk_buff_head skb_queue; struct sk_buff_head status_queue; @@ -1106,7 +1130,7 @@ struct ieee80211_sub_if_data { struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; /* for ieee80211_set_active_links_async() */ - struct work_struct activate_links_work; + struct wiphy_work activate_links_work; u16 desired_active_links; #ifdef CONFIG_MAC80211_DEBUGFS @@ -1129,62 +1153,8 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } -static inline void sdata_lock(struct ieee80211_sub_if_data *sdata) - __acquires(&sdata->wdev.mtx) -{ - mutex_lock(&sdata->wdev.mtx); - __acquire(&sdata->wdev.mtx); -} - -static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata) - __releases(&sdata->wdev.mtx) -{ - mutex_unlock(&sdata->wdev.mtx); - __release(&sdata->wdev.mtx); -} - #define sdata_dereference(p, sdata) \ - rcu_dereference_protected(p, lockdep_is_held(&sdata->wdev.mtx)) - -static inline void -sdata_assert_lock(struct ieee80211_sub_if_data *sdata) -{ - lockdep_assert_held(&sdata->wdev.mtx); -} - -static inline int -ieee80211_chanwidth_get_shift(enum nl80211_chan_width width) -{ - switch (width) { - case NL80211_CHAN_WIDTH_5: - return 2; - case NL80211_CHAN_WIDTH_10: - return 1; - default: - return 0; - } -} - -static inline int -ieee80211_chandef_get_shift(struct cfg80211_chan_def *chandef) -{ - return ieee80211_chanwidth_get_shift(chandef->width); -} - -static inline int -ieee80211_vif_get_shift(struct ieee80211_vif *vif) -{ - struct ieee80211_chanctx_conf *chanctx_conf; - int shift = 0; - - rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); - if (chanctx_conf) - shift = ieee80211_chandef_get_shift(&chanctx_conf->def); - rcu_read_unlock(); - - return shift; -} + wiphy_dereference(sdata->local->hw.wiphy, p) static inline int ieee80211_get_mbssid_beacon_len(struct cfg80211_mbssid_elems *elems, @@ -1254,7 +1224,7 @@ struct tpt_led_trigger { #endif /** - * mac80211 scan flags - currently active scan mode + * enum mac80211_scan_flags - currently active scan mode * * @SCAN_SW_SCANNING: We're currently in the process of scanning but may as * well be on the operating channel @@ -1272,7 +1242,7 @@ struct tpt_led_trigger { * and could send a probe request after receiving a beacon. * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request */ -enum { +enum mac80211_scan_flags { SCAN_SW_SCANNING, SCAN_HW_SCANNING, SCAN_ONCHANNEL_SCANNING, @@ -1362,7 +1332,7 @@ struct ieee80211_local { spinlock_t filter_lock; /* used for uploading changed mc list */ - struct work_struct reconfig_filter; + struct wiphy_work reconfig_filter; /* aggregated multicast list */ struct netdev_hw_addr_list mc_list; @@ -1429,10 +1399,9 @@ struct ieee80211_local { /* Station data */ /* - * The mutex only protects the list, hash table and - * counter, reads are done with RCU. + * The list, hash table and counter are protected + * by the wiphy mutex, reads are done with RCU. */ - struct mutex sta_mtx; spinlock_t tim_lock; unsigned long num_sta; struct list_head sta_list; @@ -1461,15 +1430,6 @@ struct ieee80211_local { struct list_head mon_list; /* only that are IFF_UP && !cooked */ struct mutex iflist_mtx; - /* - * Key mutex, protects sdata's key_list and sta_info's - * key pointers and ptk_idx (write access, they're RCU.) - */ - struct mutex key_mtx; - - /* mutex for scan and work locking */ - struct mutex mtx; - /* Scanning and BSS list */ unsigned long scanning; struct cfg80211_ssid scan_ssid; @@ -1500,7 +1460,6 @@ struct ieee80211_local { /* channel contexts */ struct list_head chanctx_list; - struct mutex chanctx_mtx; #ifdef CONFIG_MAC80211_LEDS struct led_trigger tx_led, rx_led, assoc_led, radio_led; @@ -1554,8 +1513,8 @@ struct ieee80211_local { * interface (and monitors) in PS, this then points there. */ struct ieee80211_sub_if_data *ps_sdata; - struct work_struct dynamic_ps_enable_work; - struct work_struct dynamic_ps_disable_work; + struct wiphy_work dynamic_ps_enable_work; + struct wiphy_work dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; struct notifier_block ifa_notifier; struct notifier_block ifa6_notifier; @@ -1733,6 +1692,8 @@ struct ieee802_11_elems { const struct ieee80211_eht_operation *eht_operation; const struct ieee80211_multi_link_elem *ml_basic; const struct ieee80211_multi_link_elem *ml_reconf; + const struct ieee80211_bandwidth_indication *bandwidth_indication; + const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT]; /* length of them, respectively */ u8 ext_capab_len; @@ -1766,6 +1727,8 @@ struct ieee802_11_elems { /* The reconfiguration Multi-Link element in the original IEs */ const struct element *ml_reconf_elem; + u8 ttlm_num; + /* * store the per station profile pointer and length in case that the * parsing also handled Multi-Link element parsing for a specific link @@ -1783,7 +1746,7 @@ struct ieee802_11_elems { */ size_t scratch_len; u8 *scratch_pos; - u8 scratch[]; + u8 scratch[] __counted_by(scratch_len); }; static inline struct ieee80211_local *hw_to_local( @@ -1933,8 +1896,7 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work); int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, const u8 *ssid, u8 ssid_len, struct ieee80211_channel **channels, - unsigned int n_channels, - enum nl80211_bss_scan_width scan_width); + unsigned int n_channels); int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req); void ieee80211_scan_cancel(struct ieee80211_local *local); @@ -1983,12 +1945,13 @@ int ieee80211_mgmt_tx_cancel_wait(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); /* channel switch handling */ -void ieee80211_csa_finalize_work(struct work_struct *work); +void ieee80211_csa_finalize_work(struct wiphy *wiphy, struct wiphy_work *work); int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_csa_settings *params); /* color change handling */ -void ieee80211_color_change_finalize_work(struct work_struct *work); +void ieee80211_color_change_finalize_work(struct wiphy *wiphy, + struct wiphy_work *work); void ieee80211_color_collision_detection_work(struct work_struct *work); /* interface handling */ @@ -2038,8 +2001,10 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, void ieee80211_link_stop(struct ieee80211_link_data *link); int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata, u16 new_links, u16 dormant_links); -void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata); -int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links); +static inline void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata) +{ + ieee80211_vif_set_links(sdata, 0, 0); +} /* tx handling */ void ieee80211_clear_tx_pending(struct ieee80211_local *local); @@ -2061,7 +2026,7 @@ struct sk_buff * ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, - int retry_count, int shift, bool send_to_cooked, + int retry_count, bool send_to_cooked, struct ieee80211_tx_status *status); void ieee80211_check_fast_xmit(struct sta_info *sta); @@ -2094,19 +2059,17 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, u16 initiator, u16 reason_code); int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, - const u8 *bssid); + const u8 *bssid, int link_id); bool ieee80211_smps_is_restrictive(enum ieee80211_smps_mode smps_mode_old, enum ieee80211_smps_mode smps_mode_new); -void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason, bool stop); void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason, bool stop); -void ___ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq, - const struct ieee80211_addba_ext_ie *addbaext); +void __ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx, bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, @@ -2123,13 +2086,11 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, enum ieee80211_agg_stop_reason reason); -int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, - enum ieee80211_agg_stop_reason reason); void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); -void ieee80211_ba_session_work(struct work_struct *work); +void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work); void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid); @@ -2207,7 +2168,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * flags from &enum ieee80211_conn_flags. * @bssid: the currently connected bssid (for reporting) * @csa_ie: parsed 802.11 csa elements on count, mode, chandef and mesh ttl. - All of them will be filled with if success only. + * All of them will be filled with if success only. * Return: 0 on success, <0 on error and >0 if there is nothing to parse. */ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, @@ -2239,8 +2200,7 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw) /* utility functions/constants */ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */ int ieee80211_frame_duration(enum nl80211_band band, size_t len, - int rate, int erp, int short_preamble, - int shift); + int rate, int erp, int short_preamble); void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params *qparam, int ac); @@ -2335,8 +2295,6 @@ ieee802_11_parse_elems(const u8 *start, size_t len, bool action, return ieee802_11_parse_elems_crc(start, len, action, 0, 0, bss); } -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id); - extern const int ieee802_1d_to_ac[8]; static inline int ieee80211_ac_from_tid(int tid) @@ -2344,8 +2302,10 @@ static inline int ieee80211_ac_from_tid(int tid) return ieee802_1d_to_ac[tid & 7]; } -void ieee80211_dynamic_ps_enable_work(struct work_struct *work); -void ieee80211_dynamic_ps_disable_work(struct work_struct *work); +void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy, + struct wiphy_work *work); +void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy, + struct wiphy_work *work); void ieee80211_dynamic_ps_timer(struct timer_list *t); void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -2430,6 +2390,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, struct txq_info *txq, int tid); void ieee80211_txq_purge(struct ieee80211_local *local, struct txq_info *txqi); +void ieee80211_purge_sta_txqs(struct sta_info *sta); void ieee80211_txq_remove_vlan(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void ieee80211_fill_txq_stats(struct cfg80211_txq_stats *txqstats, @@ -2523,7 +2484,7 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, const struct ieee80211_vht_operation *oper, const struct ieee80211_ht_operation *htop, struct cfg80211_chan_def *chandef); -void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper, +void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, bool support_160, bool support_320, struct cfg80211_chan_def *chandef); bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, @@ -2565,7 +2526,7 @@ void ieee80211_recalc_chanctx_min_def(struct ieee80211_local *local, struct ieee80211_link_data *rsvd_for); bool ieee80211_is_radar_required(struct ieee80211_local *local); -void ieee80211_dfs_cac_timer_work(struct work_struct *work); +void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work); void ieee80211_dfs_cac_cancel(struct ieee80211_local *local); void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, struct wiphy_work *work); @@ -2590,7 +2551,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, const u8 *extra_ies, size_t extra_ies_len); int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper); -void ieee80211_tdls_peer_del_work(struct work_struct *wk); +void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk); int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, const u8 *addr, u8 oper_class, struct cfg80211_chan_def *chandef); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 6e3bfb46af..11c4caa474 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -33,14 +33,13 @@ * The interface list in each struct ieee80211_local is protected * three-fold: * - * (1) modifications may only be done under the RTNL - * (2) modifications and readers are protected against each other by - * the iflist_mtx. - * (3) modifications are done in an RCU manner so atomic readers + * (1) modifications may only be done under the RTNL *and* wiphy mutex + * *and* iflist_mtx + * (2) modifications are done in an RCU manner so atomic readers * can traverse the list in RCU-safe blocks. * * As a consequence, reads (traversals) of the list can be protected - * by either the RTNL, the iflist_mtx or RCU. + * by either the RTNL, the wiphy mutex, the iflist_mtx or RCU. */ static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work); @@ -110,7 +109,7 @@ static u32 __ieee80211_recalc_idle(struct ieee80211_local *local, bool working, scanning, active; unsigned int led_trig_start = 0, led_trig_stop = 0; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); active = force_active || !list_empty(&local->chanctx_list) || @@ -160,6 +159,8 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, u8 *m; int ret = 0; + lockdep_assert_wiphy(local->hw.wiphy); + if (is_zero_ether_addr(local->hw.wiphy->addr_mask)) return 0; @@ -176,7 +177,6 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, if (!check_dup) return ret; - mutex_lock(&local->iflist_mtx); list_for_each_entry(iter, &local->interfaces, list) { if (iter == sdata) continue; @@ -195,7 +195,6 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, break; } } - mutex_unlock(&local->iflist_mtx); return ret; } @@ -207,6 +206,8 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata struct ieee80211_sub_if_data *scan_sdata; int ret = 0; + lockdep_assert_wiphy(local->hw.wiphy); + /* To be the most flexible here we want to only limit changing the * address if the specific interface is doing offchannel work or * scanning. @@ -214,8 +215,6 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata if (netif_carrier_ok(sdata->dev)) return -EBUSY; - mutex_lock(&local->mtx); - /* First check no ROC work is happening on this iface */ list_for_each_entry(roc, &local->roc_list, list) { if (roc->sdata != sdata) @@ -230,7 +229,7 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata /* And if this iface is scanning */ if (local->scanning) { scan_sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (sdata == scan_sdata) ret = -EBUSY; } @@ -247,13 +246,12 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata } unlock: - mutex_unlock(&local->mtx); return ret; } -static int ieee80211_change_mac(struct net_device *dev, void *addr) +static int _ieee80211_change_mac(struct ieee80211_sub_if_data *sdata, + void *addr) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct sockaddr *sa = addr; bool check_dup = true; @@ -278,7 +276,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) if (live) drv_remove_interface(local, sdata); - ret = eth_mac_addr(dev, sa); + ret = eth_mac_addr(sdata->dev, sa); if (ret == 0) { memcpy(sdata->vif.addr, sa->sa_data, ETH_ALEN); @@ -294,6 +292,27 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) return ret; } +static int ieee80211_change_mac(struct net_device *dev, void *addr) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int ret; + + /* + * This happens during unregistration if there's a bond device + * active (maybe other cases?) and we must get removed from it. + * But we really don't care anymore if it's not registered now. + */ + if (!dev->ieee80211_ptr->registered) + return 0; + + wiphy_lock(local->hw.wiphy); + ret = _ieee80211_change_mac(sdata, addr); + wiphy_unlock(local->hw.wiphy); + + return ret; +} + static inline int identical_mac_addr_allowed(int type1, int type2) { return type1 == NL80211_IFTYPE_MONITOR || @@ -311,9 +330,9 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *nsdata; - int ret; ASSERT_RTNL(); + lockdep_assert_wiphy(local->hw.wiphy); /* we hold the RTNL here so can safely walk the list */ list_for_each_entry(nsdata, &local->interfaces, list) { @@ -378,10 +397,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, } } - mutex_lock(&local->chanctx_mtx); - ret = ieee80211_check_combinations(sdata, NULL, 0, 0); - mutex_unlock(&local->chanctx_mtx); - return ret; + return ieee80211_check_combinations(sdata, NULL, 0, 0); } static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, @@ -430,12 +446,13 @@ static int ieee80211_open(struct net_device *dev) if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; + wiphy_lock(sdata->local->hw.wiphy); err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); if (err) - return err; + goto out; - wiphy_lock(sdata->local->hw.wiphy); err = ieee80211_do_open(&sdata->wdev, true); +out: wiphy_unlock(sdata->local->hw.wiphy); return err; @@ -453,6 +470,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do bool cancel_scan; struct cfg80211_nan_func *func; + lockdep_assert_wiphy(local->hw.wiphy); + clear_bit(SDATA_STATE_RUNNING, &sdata->state); synchronize_rcu(); /* flush _ieee80211_wake_txqs() */ @@ -516,16 +535,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do } del_timer_sync(&local->dynamic_ps_timer); - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); - cancel_work_sync(&sdata->recalc_smps); - - sdata_lock(sdata); WARN(ieee80211_vif_is_mld(&sdata->vif), "destroying interface with valid links 0x%04x\n", sdata->vif.valid_links); - mutex_lock(&local->mtx); sdata->vif.bss_conf.csa_active = false; if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->deflink.u.mgd.csa_waiting_bcn = false; @@ -534,20 +549,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do IEEE80211_QUEUE_STOP_REASON_CSA); sdata->deflink.csa_block_tx = false; } - mutex_unlock(&local->mtx); - sdata_unlock(sdata); - - cancel_work_sync(&sdata->deflink.csa_finalize_work); - cancel_work_sync(&sdata->deflink.color_change_finalize_work); - cancel_delayed_work_sync(&sdata->deflink.dfs_cac_timer_work); + wiphy_work_cancel(local->hw.wiphy, &sdata->deflink.csa_finalize_work); + wiphy_work_cancel(local->hw.wiphy, + &sdata->deflink.color_change_finalize_work); + wiphy_delayed_work_cancel(local->hw.wiphy, + &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chandef; WARN_ON(local->suspended); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&local->mtx); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_ABORTED, GFP_KERNEL); @@ -575,9 +587,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: - mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); - mutex_unlock(&local->mtx); RCU_INIT_POINTER(sdata->vif.bss_conf.chanctx_conf, NULL); /* see comment in the default case below */ ieee80211_free_keys(sdata, true); @@ -675,9 +685,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (local->monitors == 0) ieee80211_del_virtual_monitor(local); - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; @@ -750,9 +758,9 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_stop_mbssid(sdata); } - cancel_work_sync(&sdata->activate_links_work); - wiphy_lock(sdata->local->hw.wiphy); + wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->activate_links_work); + ieee80211_do_stop(sdata, true); wiphy_unlock(sdata->local->hw.wiphy); @@ -779,7 +787,7 @@ static void ieee80211_set_multicast_list(struct net_device *dev) spin_lock_bh(&local->filter_lock); __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); spin_unlock_bh(&local->filter_lock); - ieee80211_queue_work(&local->hw, &local->reconfig_filter); + wiphy_work_queue(local->hw.wiphy, &local->reconfig_filter); } /* @@ -1046,7 +1054,7 @@ void ieee80211_recalc_offload(struct ieee80211_local *local) if (!ieee80211_hw_check(&local->hw, SUPPORTS_TX_ENCAP_OFFLOAD)) return; - mutex_lock(&local->iflist_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) @@ -1054,8 +1062,6 @@ void ieee80211_recalc_offload(struct ieee80211_local *local) ieee80211_recalc_sdata_offload(sdata); } - - mutex_unlock(&local->iflist_mtx); } void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, @@ -1133,7 +1139,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) snprintf(sdata->name, IFNAMSIZ, "%s-monitor", wiphy_name(local->hw.wiphy)); sdata->wdev.iftype = NL80211_IFTYPE_MONITOR; - mutex_init(&sdata->wdev.mtx); + sdata->wdev.wiphy = local->hw.wiphy; ieee80211_sdata_init(local, sdata); @@ -1158,19 +1164,14 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) rcu_assign_pointer(local->monitor_sdata, sdata); mutex_unlock(&local->iflist_mtx); - sdata_lock(sdata); - mutex_lock(&local->mtx); ret = ieee80211_link_use_channel(&sdata->deflink, &local->monitor_chandef, IEEE80211_CHANCTX_EXCLUSIVE); - mutex_unlock(&local->mtx); - sdata_unlock(sdata); if (ret) { mutex_lock(&local->iflist_mtx); RCU_INIT_POINTER(local->monitor_sdata, NULL); mutex_unlock(&local->iflist_mtx); synchronize_net(); drv_remove_interface(local, sdata); - mutex_destroy(&sdata->wdev.mtx); kfree(sdata); return ret; } @@ -1206,15 +1207,10 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local) synchronize_net(); - sdata_lock(sdata); - mutex_lock(&local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&local->mtx); - sdata_unlock(sdata); drv_remove_interface(local, sdata); - mutex_destroy(&sdata->wdev.mtx); kfree(sdata); } @@ -1232,6 +1228,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) int res; u32 hw_reconf_flags = 0; + lockdep_assert_wiphy(local->hw.wiphy); + switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: { struct ieee80211_sub_if_data *master; @@ -1239,9 +1237,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) if (!sdata->bss) return -ENOLINK; - mutex_lock(&local->mtx); list_add(&sdata->u.vlan.list, &sdata->bss->vlans); - mutex_unlock(&local->mtx); master = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); @@ -1258,10 +1254,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sizeof(sdata->vif.hw_queue)); sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef; - mutex_lock(&local->key_mtx); sdata->crypto_tx_tailroom_needed_cnt += master->crypto_tx_tailroom_needed_cnt; - mutex_unlock(&local->key_mtx); break; } @@ -1352,9 +1346,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); ieee80211_recalc_offload(local); - mutex_lock(&local->mtx); ieee80211_recalc_idle(local); - mutex_unlock(&local->mtx); netif_carrier_on(dev); break; @@ -1459,11 +1451,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) drv_stop(local); err_del_bss: sdata->bss = NULL; - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { - mutex_lock(&local->mtx); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) list_del(&sdata->u.vlan.list); - mutex_unlock(&local->mtx); - } /* might already be clear but that doesn't matter */ clear_bit(SDATA_STATE_RUNNING, &sdata->state); return res; @@ -1490,12 +1479,13 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, { struct ieee80211_mgmt *mgmt = (void *)skb->data; + lockdep_assert_wiphy(local->hw.wiphy); + if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_BACK) { struct sta_info *sta; int len = skb->len; - mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) { switch (mgmt->u.action.u.addba_req.action_code) { @@ -1516,7 +1506,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, break; } } - mutex_unlock(&local->sta_mtx); } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_VHT) { switch (mgmt->u.action.u.vht_group_notif.action_code) { @@ -1530,7 +1519,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, band = status->band; opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; - mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) @@ -1538,7 +1526,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, &sta->deflink, opmode, band); - mutex_unlock(&local->sta_mtx); break; } case WLAN_VHT_ACTION_GROUPID_MGMT: @@ -1585,7 +1572,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, * a block-ack session was active. That cannot be * right, so terminate the session. */ - mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) { u16 tid = ieee80211_get_tid(hdr); @@ -1595,7 +1581,6 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, WLAN_REASON_QSTA_REQUIRE_SETUP, true); } - mutex_unlock(&local->sta_mtx); } else switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: ieee80211_sta_rx_queued_mgmt(sdata, skb); @@ -1692,15 +1677,8 @@ static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work) } } -static void ieee80211_recalc_smps_work(struct work_struct *work) -{ - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, recalc_smps); - - ieee80211_recalc_smps(sdata, &sdata->deflink); -} - -static void ieee80211_activate_links_work(struct work_struct *work) +static void ieee80211_activate_links_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -1745,8 +1723,8 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sdata->skb_queue); skb_queue_head_init(&sdata->status_queue); wiphy_work_init(&sdata->work, ieee80211_iface_work); - INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); - INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work); + wiphy_work_init(&sdata->activate_links_work, + ieee80211_activate_links_work); switch (type) { case NL80211_IFTYPE_P2P_GO: @@ -1805,7 +1783,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* need to do this after the switch so vif.type is correct */ ieee80211_link_setup(&sdata->deflink); - ieee80211_debugfs_add_netdev(sdata); + ieee80211_debugfs_recreate_netdev(sdata, false); } static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, @@ -1936,6 +1914,8 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, u8 tmp_addr[ETH_ALEN]; int i; + lockdep_assert_wiphy(local->hw.wiphy); + /* default ... something at least */ memcpy(perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN); @@ -1943,8 +1923,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, local->hw.wiphy->n_addresses <= 1) return; - mutex_lock(&local->iflist_mtx); - switch (type) { case NL80211_IFTYPE_MONITOR: /* doesn't matter */ @@ -1968,7 +1946,7 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, if (!ieee80211_sdata_running(sdata)) continue; memcpy(perm_addr, sdata->vif.addr, ETH_ALEN); - goto out_unlock; + return; } } fallthrough; @@ -2054,9 +2032,6 @@ static void ieee80211_assign_perm_addr(struct ieee80211_local *local, break; } - - out_unlock: - mutex_unlock(&local->iflist_mtx); } int ieee80211_if_add(struct ieee80211_local *local, const char *name, @@ -2070,6 +2045,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, int ret, i; ASSERT_RTNL(); + lockdep_assert_wiphy(local->hw.wiphy); if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) { struct wireless_dev *wdev; @@ -2157,8 +2133,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_LIST_HEAD(&sdata->key_list); - INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, - ieee80211_delayed_tailroom_dec); + wiphy_delayed_work_init(&sdata->dec_tailroom_needed_wk, + ieee80211_delayed_tailroom_dec); for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband; @@ -2236,6 +2212,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) { ASSERT_RTNL(); + lockdep_assert_wiphy(sdata->local->hw.wiphy); mutex_lock(&sdata->local->iflist_mtx); list_del_rcu(&sdata->list); @@ -2281,19 +2258,30 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) */ cfg80211_shutdown_all_interfaces(local->hw.wiphy); + wiphy_lock(local->hw.wiphy); + WARN(local->open_count, "%s: open count remains %d\n", wiphy_name(local->hw.wiphy), local->open_count); - ieee80211_txq_teardown_flows(local); - mutex_lock(&local->iflist_mtx); list_splice_init(&local->interfaces, &unreg_list); mutex_unlock(&local->iflist_mtx); - wiphy_lock(local->hw.wiphy); list_for_each_entry_safe(sdata, tmp, &unreg_list, list) { bool netdev = sdata->dev; + /* + * Remove IP addresses explicitly, since the notifier will + * skip the callbacks if wdev->registered is false, since + * we can't acquire the wiphy_lock() again there if already + * inside this locked section. + */ + sdata->vif.cfg.arp_addr_cnt = 0; + if (sdata->vif.type == NL80211_IFTYPE_STATION && + sdata->u.mgd.associated) + ieee80211_vif_cfg_change_notify(sdata, + BSS_CHANGED_ARP_FILTER); + list_del(&sdata->list); cfg80211_unregister_wdev(&sdata->wdev); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a2db0585dc..af74d7f9d9 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -53,11 +53,6 @@ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; -static void assert_key_lock(struct ieee80211_local *local) -{ - lockdep_assert_held(&local->key_mtx); -} - static void update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { @@ -67,7 +62,7 @@ update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) return; /* crypto_tx_tailroom_needed_cnt is protected by this */ - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); rcu_read_lock(); @@ -98,7 +93,7 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net */ - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); update_vlan_tailroom_need_count(sdata, 1); @@ -114,7 +109,7 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata) static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta) { - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta); @@ -129,6 +124,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) int ret = -EOPNOTSUPP; might_sleep(); + lockdep_assert_wiphy(key->local->hw.wiphy); if (key->flags & KEY_FLAG_TAINTED) { /* If we get here, it's during resume and the key is @@ -151,8 +147,6 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!key->local->ops->set_key) goto out_unsupported; - assert_key_lock(key->local); - sta = key->sta; /* @@ -242,14 +236,14 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) if (!key || !key->local->ops->set_key) return; - assert_key_lock(key->local); - if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) return; sta = key->sta; sdata = key->sdata; + lockdep_assert_wiphy(key->local->hw.wiphy); + if (key->conf.link_id >= 0 && sdata->vif.active_links && !(sdata->vif.active_links & BIT(key->conf.link_id))) return; @@ -275,7 +269,7 @@ static int _ieee80211_set_tx_key(struct ieee80211_key *key, bool force) struct sta_info *sta = key->sta; struct ieee80211_local *local = key->local; - assert_key_lock(local); + lockdep_assert_wiphy(local->hw.wiphy); set_sta_flag(sta, WLAN_STA_USES_ENCRYPTION); @@ -300,7 +294,7 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old, struct sta_info *sta = new->sta; int i; - assert_key_lock(local); + lockdep_assert_wiphy(local->hw.wiphy); if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) { /* Extended Key ID key install, initial one or rekey */ @@ -317,11 +311,9 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old, * job done for the few ms we need it.) */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); - mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) - ___ieee80211_stop_tx_ba_session(sta, i, - AGG_STOP_LOCAL_REQUEST); - mutex_unlock(&sta->ampdu_mlme.mtx); + __ieee80211_stop_tx_ba_session(sta, i, + AGG_STOP_LOCAL_REQUEST); } } else if (old) { /* Rekey without Extended Key ID. @@ -358,12 +350,14 @@ static void __ieee80211_set_default_key(struct ieee80211_link_data *link, struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= 0 && idx < NUM_DEFAULT_KEYS) { - key = key_mtx_dereference(sdata->local, sdata->keys[idx]); + key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->keys[idx]); if (!key) - key = key_mtx_dereference(sdata->local, link->gtk[idx]); + key = wiphy_dereference(sdata->local->hw.wiphy, + link->gtk[idx]); } if (uni) { @@ -382,9 +376,9 @@ static void __ieee80211_set_default_key(struct ieee80211_link_data *link, void ieee80211_set_default_key(struct ieee80211_link_data *link, int idx, bool uni, bool multi) { - mutex_lock(&link->sdata->local->key_mtx); + lockdep_assert_wiphy(link->sdata->local->hw.wiphy); + __ieee80211_set_default_key(link, idx, uni, multi); - mutex_unlock(&link->sdata->local->key_mtx); } static void @@ -393,11 +387,12 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= NUM_DEFAULT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) - key = key_mtx_dereference(sdata->local, link->gtk[idx]); + key = wiphy_dereference(sdata->local->hw.wiphy, + link->gtk[idx]); rcu_assign_pointer(link->default_mgmt_key, key); @@ -407,9 +402,9 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) void ieee80211_set_default_mgmt_key(struct ieee80211_link_data *link, int idx) { - mutex_lock(&link->sdata->local->key_mtx); + lockdep_assert_wiphy(link->sdata->local->hw.wiphy); + __ieee80211_set_default_mgmt_key(link, idx); - mutex_unlock(&link->sdata->local->key_mtx); } static void @@ -418,12 +413,13 @@ __ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_key *key = NULL; - assert_key_lock(sdata->local); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS && idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS + NUM_DEFAULT_BEACON_KEYS) - key = key_mtx_dereference(sdata->local, link->gtk[idx]); + key = wiphy_dereference(sdata->local->hw.wiphy, + link->gtk[idx]); rcu_assign_pointer(link->default_beacon_key, key); @@ -433,9 +429,9 @@ __ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) void ieee80211_set_default_beacon_key(struct ieee80211_link_data *link, int idx) { - mutex_lock(&link->sdata->local->key_mtx); + lockdep_assert_wiphy(link->sdata->local->hw.wiphy); + __ieee80211_set_default_beacon_key(link, idx); - mutex_unlock(&link->sdata->local->key_mtx); } static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, @@ -452,6 +448,8 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, bool defunikey, defmultikey, defmgmtkey, defbeaconkey; bool is_wep; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + /* caller must provide at least one old/new */ if (WARN_ON(!new && !old)) return 0; @@ -482,7 +480,7 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (sta) { link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&sta->local->sta_mtx)); + lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!link_sta) return -ENOLINK; } @@ -510,12 +508,10 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ret = ieee80211_key_enable_hw_accel(new); } } else { - if (!new->local->wowlan) { + if (!new->local->wowlan) ret = ieee80211_key_enable_hw_accel(new); - } else { - assert_key_lock(new->local); + else new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; - } } if (ret) @@ -541,17 +537,17 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ieee80211_check_fast_rx(sta); } else { defunikey = old && - old == key_mtx_dereference(sdata->local, - sdata->default_unicast_key); + old == wiphy_dereference(sdata->local->hw.wiphy, + sdata->default_unicast_key); defmultikey = old && - old == key_mtx_dereference(sdata->local, - link->default_multicast_key); + old == wiphy_dereference(sdata->local->hw.wiphy, + link->default_multicast_key); defmgmtkey = old && - old == key_mtx_dereference(sdata->local, - link->default_mgmt_key); + old == wiphy_dereference(sdata->local->hw.wiphy, + link->default_mgmt_key); defbeaconkey = old && - old == key_mtx_dereference(sdata->local, - link->default_beacon_key); + old == wiphy_dereference(sdata->local->hw.wiphy, + link->default_beacon_key); if (defunikey && !new) __ieee80211_set_default_key(link, -1, true, false); @@ -775,8 +771,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key, if (delay_tailroom) { /* see ieee80211_delayed_tailroom_dec */ sdata->crypto_tx_tailroom_pending_dec++; - schedule_delayed_work(&sdata->dec_tailroom_needed_wk, - HZ/2); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->dec_tailroom_needed_wk, + HZ / 2); } else { decrease_tailroom_need_count(sdata, 1); } @@ -859,13 +856,15 @@ int ieee80211_key_link(struct ieee80211_key *key, bool delay_tailroom = sdata->vif.type == NL80211_IFTYPE_STATION; int ret; - mutex_lock(&sdata->local->key_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (sta && pairwise) { struct ieee80211_key *alt_key; - old_key = key_mtx_dereference(sdata->local, sta->ptk[idx]); - alt_key = key_mtx_dereference(sdata->local, sta->ptk[idx ^ 1]); + old_key = wiphy_dereference(sdata->local->hw.wiphy, + sta->ptk[idx]); + alt_key = wiphy_dereference(sdata->local->hw.wiphy, + sta->ptk[idx ^ 1]); /* The rekey code assumes that the old and new key are using * the same cipher. Enforce the assumption for pairwise keys. @@ -881,21 +880,22 @@ int ieee80211_key_link(struct ieee80211_key *key, if (link_id >= 0) { link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&sta->local->sta_mtx)); + lockdep_is_held(&sta->local->hw.wiphy->mtx)); if (!link_sta) { ret = -ENOLINK; goto out; } } - old_key = key_mtx_dereference(sdata->local, link_sta->gtk[idx]); + old_key = wiphy_dereference(sdata->local->hw.wiphy, + link_sta->gtk[idx]); } else { if (idx < NUM_DEFAULT_KEYS) - old_key = key_mtx_dereference(sdata->local, - sdata->keys[idx]); + old_key = wiphy_dereference(sdata->local->hw.wiphy, + sdata->keys[idx]); if (!old_key) - old_key = key_mtx_dereference(sdata->local, - link->gtk[idx]); + old_key = wiphy_dereference(sdata->local->hw.wiphy, + link->gtk[idx]); } /* Non-pairwise keys must also not switch the cipher on rekey */ @@ -940,8 +940,6 @@ int ieee80211_key_link(struct ieee80211_key *key, out: ieee80211_key_free_unused(key); - mutex_unlock(&sdata->local->key_mtx); - return ret; } @@ -967,8 +965,6 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) lockdep_assert_wiphy(sdata->local->hw.wiphy); - mutex_lock(&sdata->local->key_mtx); - sdata->crypto_tx_tailroom_needed_cnt = 0; sdata->crypto_tx_tailroom_pending_dec = 0; @@ -985,8 +981,6 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) ieee80211_key_enable_hw_accel(key); } } - - mutex_unlock(&sdata->local->key_mtx); } void ieee80211_iter_keys(struct ieee80211_hw *hw, @@ -1004,7 +998,6 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, lockdep_assert_wiphy(hw->wiphy); - mutex_lock(&local->key_mtx); if (vif) { sdata = vif_to_sdata(vif); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) @@ -1019,7 +1012,6 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, key->sta ? &key->sta->sta : NULL, &key->conf, iter_data); } - mutex_unlock(&local->key_mtx); } EXPORT_SYMBOL(ieee80211_iter_keys); @@ -1099,7 +1091,8 @@ void ieee80211_remove_link_keys(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; struct ieee80211_key *key, *tmp; - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { if (key->conf.link_id != link->link_id) continue; @@ -1108,7 +1101,6 @@ void ieee80211_remove_link_keys(struct ieee80211_link_data *link, key, NULL); list_add_tail(&key->list, keys); } - mutex_unlock(&local->key_mtx); } void ieee80211_free_key_list(struct ieee80211_local *local, @@ -1116,10 +1108,10 @@ void ieee80211_free_key_list(struct ieee80211_local *local, { struct ieee80211_key *key, *tmp; - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry_safe(key, tmp, keys, list) __ieee80211_key_destroy(key, false); - mutex_unlock(&local->key_mtx); } void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, @@ -1131,9 +1123,10 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, struct ieee80211_key *key, *tmp; LIST_HEAD(keys); - cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); + wiphy_delayed_work_cancel(local->hw.wiphy, + &sdata->dec_tailroom_needed_wk); - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); ieee80211_free_keys_iface(sdata, &keys); @@ -1166,8 +1159,6 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || vlan->crypto_tx_tailroom_pending_dec); } - - mutex_unlock(&local->key_mtx); } void ieee80211_free_sta_keys(struct ieee80211_local *local, @@ -1176,9 +1167,10 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, struct ieee80211_key *key; int i; - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + for (i = 0; i < ARRAY_SIZE(sta->deflink.gtk); i++) { - key = key_mtx_dereference(local, sta->deflink.gtk[i]); + key = wiphy_dereference(local->hw.wiphy, sta->deflink.gtk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, NULL, key->sta, @@ -1189,7 +1181,7 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, } for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - key = key_mtx_dereference(local, sta->ptk[i]); + key = wiphy_dereference(local->hw.wiphy, sta->ptk[i]); if (!key) continue; ieee80211_key_replace(key->sdata, NULL, key->sta, @@ -1198,11 +1190,10 @@ void ieee80211_free_sta_keys(struct ieee80211_local *local, __ieee80211_key_destroy(key, key->sdata->vif.type == NL80211_IFTYPE_STATION); } - - mutex_unlock(&local->key_mtx); } -void ieee80211_delayed_tailroom_dec(struct work_struct *wk) +void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, + struct wiphy_work *wk) { struct ieee80211_sub_if_data *sdata; @@ -1225,11 +1216,9 @@ void ieee80211_delayed_tailroom_dec(struct work_struct *wk) * within an ESS this usually won't happen. */ - mutex_lock(&sdata->local->key_mtx); decrease_tailroom_need_count(sdata, sdata->crypto_tx_tailroom_pending_dec); sdata->crypto_tx_tailroom_pending_dec = 0; - mutex_unlock(&sdata->local->key_mtx); } void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, @@ -1358,7 +1347,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf) key = container_of(keyconf, struct ieee80211_key, conf); - assert_key_lock(key->local); + lockdep_assert_wiphy(key->local->hw.wiphy); /* * if key was uploaded, we assume the driver will/has remove(d) diff --git a/net/mac80211/key.h b/net/mac80211/key.h index f3df97df4b..1fa0f4f789 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -2,7 +2,7 @@ /* * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. - * Copyright (C) 2019, 2022 Intel Corporation + * Copyright (C) 2019, 2022-2023 Intel Corporation */ #ifndef IEEE80211_KEY_H @@ -168,12 +168,7 @@ void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata); int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, unsigned long del_links_mask, unsigned long add_links_mask); - -#define key_mtx_dereference(local, ref) \ - rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) -#define rcu_dereference_check_key_mtx(local, ref) \ - rcu_dereference_check(ref, lockdep_is_held(&((local)->key_mtx))) - -void ieee80211_delayed_tailroom_dec(struct work_struct *wk); +void ieee80211_delayed_tailroom_dec(struct wiphy *wiphy, + struct wiphy_work *wk); #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 16cbaea93f..bf7bd880d0 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -37,16 +37,16 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, link_conf->link_id = link_id; link_conf->vif = &sdata->vif; - INIT_WORK(&link->csa_finalize_work, - ieee80211_csa_finalize_work); - INIT_WORK(&link->color_change_finalize_work, - ieee80211_color_change_finalize_work); + wiphy_work_init(&link->csa_finalize_work, + ieee80211_csa_finalize_work); + wiphy_work_init(&link->color_change_finalize_work, + ieee80211_color_change_finalize_work); INIT_DELAYED_WORK(&link->color_collision_detect_work, ieee80211_color_collision_detection_work); INIT_LIST_HEAD(&link->assigned_chanctx_list); INIT_LIST_HEAD(&link->reserved_chanctx_list); - INIT_DELAYED_WORK(&link->dfs_cac_timer_work, - ieee80211_dfs_cac_timer_work); + wiphy_delayed_work_init(&link->dfs_cac_timer_work, + ieee80211_dfs_cac_timer_work); if (!deflink) { switch (sdata->vif.type) { @@ -191,7 +191,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *old_data[IEEE80211_MLD_MAX_NUM_LINKS]; bool use_deflink = old_links == 0; /* set for error case */ - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); memset(to_free, 0, sizeof(links)); @@ -235,6 +235,9 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, RCU_INIT_POINTER(sdata->vif.link_conf[link_id], NULL); } + if (!old_links) + ieee80211_debugfs_recreate_netdev(sdata, true); + /* link them into data structures */ for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { WARN_ON(!use_deflink && @@ -261,6 +264,8 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, old_links & old_active, new_links & sdata->vif.active_links, old); + if (!new_links) + ieee80211_debugfs_recreate_netdev(sdata, false); } if (ret) { @@ -303,23 +308,6 @@ int ieee80211_vif_set_links(struct ieee80211_sub_if_data *sdata, return ret; } -void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata) -{ - struct link_container *links[IEEE80211_MLD_MAX_NUM_LINKS]; - - /* - * The locking here is different because when we free links - * in the station case we need to be able to cancel_work_sync() - * something that also takes the lock. - */ - - sdata_lock(sdata); - ieee80211_vif_update_links(sdata, links, 0, 0); - sdata_unlock(sdata); - - ieee80211_free_links(sdata, links); -} - static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, u16 active_links) { @@ -447,17 +435,15 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, return 0; } -int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) +int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; u16 old_active; int ret; - sdata_assert_lock(sdata); - mutex_lock(&local->sta_mtx); - mutex_lock(&local->mtx); - mutex_lock(&local->key_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + old_active = sdata->vif.active_links; if (old_active & active_links) { /* @@ -473,21 +459,6 @@ int __ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) /* otherwise switch directly */ ret = _ieee80211_set_active_links(sdata, active_links); } - mutex_unlock(&local->key_mtx); - mutex_unlock(&local->mtx); - mutex_unlock(&local->sta_mtx); - - return ret; -} - -int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) -{ - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - int ret; - - sdata_lock(sdata); - ret = __ieee80211_set_active_links(vif, active_links); - sdata_unlock(sdata); return ret; } @@ -512,6 +483,6 @@ void ieee80211_set_active_links_async(struct ieee80211_vif *vif, return; sdata->desired_active_links = active_links; - schedule_work(&sdata->activate_links_work); + wiphy_work_queue(sdata->local->hw.wiphy, &sdata->activate_links_work); } EXPORT_SYMBOL_GPL(ieee80211_set_active_links_async); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4548f84451..033a5261ac 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -84,7 +84,8 @@ void ieee80211_configure_filter(struct ieee80211_local *local) local->filter_flags = new_flags & ~(1<<31); } -static void ieee80211_reconfig_filter(struct work_struct *work) +static void ieee80211_reconfig_filter(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, reconfig_filter); @@ -206,7 +207,8 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) BSS_CHANGED_PS |\ BSS_CHANGED_IBSS |\ BSS_CHANGED_ARP_FILTER |\ - BSS_CHANGED_SSID) + BSS_CHANGED_SSID |\ + BSS_CHANGED_MLD_VALID_LINKS) void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u64 changed) @@ -317,7 +319,7 @@ static void ieee80211_tasklet_handler(struct tasklet_struct *t) break; case IEEE80211_TX_STATUS_MSG: skb->pkt_type = 0; - ieee80211_tx_status(&local->hw, skb); + ieee80211_tx_status_skb(&local->hw, skb); break; default: WARN(1, "mac80211: Packet is of unknown type %d\n", @@ -340,6 +342,7 @@ static void ieee80211_restart_work(struct work_struct *work) rtnl_lock(); /* we might do interface manipulations, so need both */ wiphy_lock(local->hw.wiphy); + wiphy_work_flush(local->hw.wiphy, NULL); WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), "%s called with hardware scan in progress\n", __func__); @@ -363,15 +366,13 @@ static void ieee80211_restart_work(struct work_struct *work) */ wiphy_work_cancel(local->hw.wiphy, &sdata->u.mgd.csa_connection_drop_work); - if (sdata->vif.bss_conf.csa_active) { - sdata_lock(sdata); + if (sdata->vif.bss_conf.csa_active) ieee80211_sta_connection_lost(sdata, WLAN_REASON_UNSPECIFIED, false); - sdata_unlock(sdata); - } } - flush_delayed_work(&sdata->dec_tailroom_needed_wk); + wiphy_delayed_work_flush(local->hw.wiphy, + &sdata->dec_tailroom_needed_wk); } ieee80211_scan_cancel(local); @@ -436,7 +437,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, if (!wdev) return NOTIFY_DONE; - if (wdev->wiphy != local->hw.wiphy) + if (wdev->wiphy != local->hw.wiphy || !wdev->registered) return NOTIFY_DONE; sdata = IEEE80211_DEV_TO_SUB_IF(ndev); @@ -451,7 +452,25 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, return NOTIFY_DONE; ifmgd = &sdata->u.mgd; - sdata_lock(sdata); + + /* + * The nested here is needed to convince lockdep that this is + * all OK. Yes, we lock the wiphy mutex here while we already + * hold the notifier rwsem, that's the normal case. And yes, + * we also acquire the notifier rwsem again when unregistering + * a netdev while we already hold the wiphy mutex, so it does + * look like a typical ABBA deadlock. + * + * However, both of these things happen with the RTNL held + * already. Therefore, they can't actually happen, since the + * lock orders really are ABC and ACB, which is fine due to + * the RTNL (A). + * + * We still need to prevent recursion, which is accomplished + * by the !wdev->registered check above. + */ + mutex_lock_nested(&local->hw.wiphy->mtx, 1); + __acquire(&local->hw.wiphy->mtx); /* Copy the addresses to the vif config list */ ifa = rtnl_dereference(idev->ifa_list); @@ -468,7 +487,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, if (ifmgd->associated) ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - sdata_unlock(sdata); + wiphy_unlock(local->hw.wiphy); return NOTIFY_OK; } @@ -781,9 +800,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, __hw_addr_init(&local->mc_list); mutex_init(&local->iflist_mtx); - mutex_init(&local->mtx); - - mutex_init(&local->key_mtx); spin_lock_init(&local->filter_lock); spin_lock_init(&local->rx_path_lock); spin_lock_init(&local->queue_stop_reason_lock); @@ -804,7 +820,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, spin_lock_init(&local->handle_wake_tx_queue_lock); INIT_LIST_HEAD(&local->chanctx_list); - mutex_init(&local->chanctx_mtx); wiphy_delayed_work_init(&local->scan_work, ieee80211_scan_work); @@ -813,13 +828,13 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, wiphy_work_init(&local->radar_detected_work, ieee80211_dfs_radar_detected_work); - INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter); + wiphy_work_init(&local->reconfig_filter, ieee80211_reconfig_filter); local->smps_mode = IEEE80211_SMPS_OFF; - INIT_WORK(&local->dynamic_ps_enable_work, - ieee80211_dynamic_ps_enable_work); - INIT_WORK(&local->dynamic_ps_disable_work, - ieee80211_dynamic_ps_disable_work); + wiphy_work_init(&local->dynamic_ps_enable_work, + ieee80211_dynamic_ps_enable_work); + wiphy_work_init(&local->dynamic_ps_disable_work, + ieee80211_dynamic_ps_disable_work); timer_setup(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, 0); wiphy_work_init(&local->sched_scan_stopped_work, @@ -1052,6 +1067,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_he = false; supp_eht = false; for (band = 0; band < NUM_NL80211_BANDS; band++) { + const struct ieee80211_sband_iftype_data *iftd; struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[band]; @@ -1098,11 +1114,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_ht = supp_ht || sband->ht_cap.ht_supported; supp_vht = supp_vht || sband->vht_cap.vht_supported; - for (i = 0; i < sband->n_iftype_data; i++) { - const struct ieee80211_sband_iftype_data *iftd; - - iftd = &sband->iftype_data[i]; - + for_each_sband_iftype_data(sband, i, iftd) { supp_he = supp_he || iftd->he_cap.has_he; supp_eht = supp_eht || iftd->eht_cap.has_eht; } @@ -1443,6 +1455,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_remove_interfaces(local); rtnl_unlock(); fail_rate: + ieee80211_txq_teardown_flows(local); fail_flows: ieee80211_led_exit(local); destroy_workqueue(local->workqueue); @@ -1479,15 +1492,17 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) */ ieee80211_remove_interfaces(local); + ieee80211_txq_teardown_flows(local); + wiphy_lock(local->hw.wiphy); wiphy_delayed_work_cancel(local->hw.wiphy, &local->roc_work); + wiphy_work_cancel(local->hw.wiphy, &local->reconfig_filter); wiphy_work_cancel(local->hw.wiphy, &local->sched_scan_stopped_work); wiphy_work_cancel(local->hw.wiphy, &local->radar_detected_work); wiphy_unlock(local->hw.wiphy); rtnl_unlock(); cancel_work_sync(&local->restart_work); - cancel_work_sync(&local->reconfig_filter); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); @@ -1518,7 +1533,6 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) enum nl80211_band band; mutex_destroy(&local->iflist_mtx); - mutex_destroy(&local->mtx); if (local->wiphy_ciphers_allocated) { kfree(local->hw.wiphy->cipher_suites); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index e31c312c12..fccbcde335 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -56,6 +56,8 @@ static void ieee80211_mesh_housekeeping_timer(struct timer_list *t) * * This function checks if the mesh configuration of a mesh point matches the * local mesh configuration, i.e. if both nodes belong to the same mesh network. + * + * Returns: %true if both nodes belong to the same mesh */ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems *ie) @@ -119,6 +121,8 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata, * mesh_peer_accepts_plinks - check if an mp is willing to establish peer links * * @ie: information elements of a management frame from the mesh peer + * + * Returns: %true if the mesh peer is willing to establish peer links */ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { @@ -858,7 +862,7 @@ bool ieee80211_mesh_xmit_fast(struct ieee80211_sub_if_data *sdata, * @meshsa: source address in the mesh. Same as TA, as frame is * locally originated. * - * Return the length of the 802.11 (does not include a mesh control header) + * Returns: the length of the 802.11 frame header (excludes mesh control header) */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, const u8 *meshda, const u8 *meshsa) @@ -891,7 +895,7 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, * @addr6: 2nd address in the ae header, which corresponds to addr6 of the * mesh frame * - * Return the header length. + * Returns: the header length */ unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata, struct ieee80211s_hdr *meshhdr, @@ -1291,7 +1295,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, ieee80211_conn_flags_t conn_flags = 0; u32 vht_cap_info = 0; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); sband = ieee80211_get_sband(sdata); if (!sband) @@ -1559,7 +1563,7 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, struct mesh_csa_settings *tmp_csa_settings; int ret = 0; - lockdep_assert_held(&sdata->wdev.mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); tmp_csa_settings = kmalloc(sizeof(*tmp_csa_settings), GFP_ATOMIC); @@ -1691,11 +1695,11 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u16 stype; - sdata_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* mesh already went down */ if (!sdata->u.mesh.mesh_id_len) - goto out; + return; rx_status = IEEE80211_SKB_RXCB(skb); mgmt = (struct ieee80211_mgmt *) skb->data; @@ -1714,8 +1718,6 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; } -out: - sdata_unlock(sdata); } static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata) @@ -1745,11 +1747,11 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - sdata_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* mesh already went down */ if (!sdata->u.mesh.mesh_id_len) - goto out; + return; if (ifmsh->preq_queue_len && time_after(jiffies, @@ -1767,8 +1769,6 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags)) mesh_bss_info_changed(sdata); -out: - sdata_unlock(sdata); } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 5136907298..775d52561c 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -230,6 +230,8 @@ static void prepare_frame_for_deferred_tx(struct ieee80211_sub_if_data *sdata, * Note: This function may be called with driver locks taken that the driver * also acquires in the TX path. To avoid a deadlock we don't transmit the * frame directly but add it to the pending queue instead. + * + * Returns: 0 on success */ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, u8 ttl, const u8 *target, u32 target_sn, diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 3e52aaa57b..8a3f44ce3e 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. + * Copyright (C) 2023 Intel Corporation * Author: Luis Carlos Cobo <luisca@cozybit.com> */ @@ -173,6 +174,11 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, /** * mesh_path_move_to_queue - Move or copy frames from one mpath queue to another * + * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate) + * @from_mpath: The failed mpath + * @copy: When true, copy all the frames to the new mpath queue. When false, + * move them. + * * This function is used to transfer or copy frames from an unresolved mpath to * a gate mpath. The function also adds the Address Extension field and * updates the next hop. @@ -181,11 +187,6 @@ static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, * destination addresses are updated. * * The gate mpath must be an active mpath with a valid mpath->next_hop. - * - * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate) - * @from_mpath: The failed mpath - * @copy: When true, copy all the frames to the new mpath queue. When false, - * move them. */ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct mesh_path *from_mpath, @@ -330,6 +331,8 @@ mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) /** * mesh_path_add_gate - add the given mpath to a mesh gate to our path table * @mpath: gate path to add to table + * + * Returns: 0 on success, -EEXIST */ int mesh_path_add_gate(struct mesh_path *mpath) { @@ -388,6 +391,8 @@ static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) /** * mesh_gate_num - number of gates known to this interface * @sdata: subif data + * + * Returns: The number of gates */ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) { @@ -861,10 +866,9 @@ static void table_flush_by_iface(struct mesh_table *tbl) /** * mesh_path_flush_by_iface - Deletes all mesh paths associated with a given iface * - * This function deletes both mesh paths as well as mesh portal paths. - * * @sdata: interface data to match * + * This function deletes both mesh paths as well as mesh portal paths. */ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { @@ -944,6 +948,8 @@ void mesh_path_tx_pending(struct mesh_path *mpath) * queue to that gate's queue. If there are more than one gates, the frames * are copied from each gate to the next. After frames are copied, the * mpath queues are emptied onto the transmission queue. + * + * Returns: 0 on success, -EHOSTUNREACH */ int mesh_path_send_to_gates(struct mesh_path *mpath) { diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index cc62c2a01f..28bf794f67 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -153,6 +153,8 @@ out: * selected if any non-HT peers are present in our MBSS. 20MHz-protection mode * is selected if all peers in our 20/40MHz MBSS support HT and at least one * HT20 peer is present. Otherwise no-protection mode is selected. + * + * Returns: BSS_CHANGED_HT or 0 for no change */ static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) { @@ -362,7 +364,7 @@ free: * Mesh paths with this peer as next hop should be flushed * by the caller outside of plink_lock. * - * Returns beacon changed flag if the beacon content changed. + * Returns: beacon changed flag if the beacon content changed. * * Locking: the caller must hold sta->mesh->plink_lock */ @@ -390,6 +392,8 @@ static u64 __mesh_plink_deactivate(struct sta_info *sta) * @sta: mesh peer link to deactivate * * All mesh paths with this peer as next hop will be flushed + * + * Returns: beacon changed flag if the beacon content changed. */ u64 mesh_plink_deactivate(struct sta_info *sta) { diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 35eacca43e..20e022a039 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -15,6 +15,8 @@ /** * mps_qos_null_get - create pre-addressed QoS Null frame for mesh powersave * @sta: the station to get the frame for + * + * Returns: A newly allocated SKB */ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) { @@ -77,6 +79,8 @@ static void mps_qos_null_tx(struct sta_info *sta) * * sets the non-peer power mode and triggers the driver PS (re-)configuration * Return BSS_CHANGED_BEACON if a beacon update is necessary. + * + * Returns: BSS_CHANGED_BEACON if a beacon update is in order. */ u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) { @@ -147,7 +151,7 @@ u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata) * * @sta: mesh STA * @pm: the power mode to set - * Return BSS_CHANGED_BEACON if a beacon update is in order. + * Returns: BSS_CHANGED_BEACON if a beacon update is in order. */ u64 ieee80211_mps_set_sta_local_pm(struct sta_info *sta, enum nl80211_mesh_power_mode pm) diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 9e342cc250..8cf3f395f5 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -3,7 +3,7 @@ * Copyright 2011-2012, Pavel Zubarev <pavel.zubarev@gmail.com> * Copyright 2011-2012, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de> * Copyright 2011-2012, cozybit Inc. - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021,2023 Intel Corporation */ #include "ieee80211_i.h" @@ -37,6 +37,8 @@ struct sync_method { * mesh_peer_tbtt_adjusting - check if an mp is currently adjusting its TBTT * * @cfg: mesh config element from the mesh peer (or %NULL) + * + * Returns: If the mesh peer is currently adjusting its TBTT */ static bool mesh_peer_tbtt_adjusting(const struct ieee80211_meshconf_ie *cfg) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 73f8df03d1..241e615189 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -8,7 +8,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2023 Intel Corporation + * Copyright (C) 2018 - 2024 Intel Corporation */ #include <linux/delay.h> @@ -43,6 +43,9 @@ #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 +#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100) +#define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00 + static int max_nullfunc_tries = 2; module_param(max_nullfunc_tries, int, 0644); MODULE_PARM_DESC(max_nullfunc_tries, @@ -110,7 +113,8 @@ ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper, return 0; /* set 160/320 supported to get the full AP definition */ - ieee80211_chandef_eht_oper(eht_oper, true, true, &ap_chandef); + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, + true, true, &ap_chandef); ap_center_freq = ap_chandef.center_freq1; ap_bw = 20 * BIT(u8_get_bits(info->control, IEEE80211_EHT_OPER_CHAN_WIDTH)); @@ -175,7 +179,7 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link, static void run_again(struct ieee80211_sub_if_data *sdata, unsigned long timeout) { - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!timer_pending(&sdata->u.mgd.timer) || time_before(timeout, sdata->u.mgd.timer.expires)) @@ -388,7 +392,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, if (eht_oper && (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) { struct cfg80211_chan_def eht_chandef = *chandef; - ieee80211_chandef_eht_oper(eht_oper, + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, eht_chandef.width == NL80211_CHAN_WIDTH_160, false, &eht_chandef); @@ -830,7 +834,6 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb, struct ieee80211_supported_band *sband, struct ieee80211_mgd_assoc_data *assoc_data) { - unsigned int shift = ieee80211_chanwidth_get_shift(width); unsigned int rates_len, supp_rates_len; u32 rates = 0; int i, count; @@ -869,8 +872,7 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb, count = 0; for (i = 0; i < sband->n_bitrates; i++) { if (BIT(i) & rates) { - int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - 5 * (1 << shift)); + int rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); *pos++ = (u8)rate; if (++count == 8) break; @@ -886,8 +888,7 @@ static void ieee80211_assoc_add_rates(struct sk_buff *skb, if (BIT(i) & rates) { int rate; - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - 5 * (1 << shift)); + rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); *pos++ = (u8)rate; } } @@ -1401,7 +1402,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) assoc_data->ie, assoc_data->ie_len); - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); size = local->hw.extra_tx_headroom + sizeof(*mgmt) + /* bit too much but doesn't matter */ @@ -1586,6 +1587,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) ifmgd->assoc_req_ies_len = pos - ie_start; + info.link_id = assoc_data->assoc_link_id; drv_mgd_prepare_tx(local, sdata, &info); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; @@ -1689,15 +1691,13 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, if (!ieee80211_sdata_running(sdata)) return; - sdata_lock(sdata); - mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!ifmgd->associated) - goto out; + return; if (!link->conf->csa_active) - goto out; + return; /* * using reservation isn't immediate as it may be deferred until later @@ -1713,7 +1713,7 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, * reservations */ if (link->reserved_ready) - goto out; + return; ret = ieee80211_link_use_reserved_context(link); if (ret) { @@ -1722,10 +1722,8 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, ret); wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - goto out; } - - goto out; + return; } if (!cfg80211_chandef_identical(&link->conf->chandef, @@ -1734,18 +1732,13 @@ static void ieee80211_chswitch_work(struct wiphy *wiphy, "failed to finalize channel switch, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - goto out; + return; } link->u.mgd.csa_waiting_bcn = true; ieee80211_sta_reset_beacon_monitor(sdata); ieee80211_sta_reset_conn_monitor(sdata); - -out: - mutex_unlock(&local->chanctx_mtx); - mutex_unlock(&local->mtx); - sdata_unlock(sdata); } static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) @@ -1755,7 +1748,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; int ret; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); WARN_ON(!link->conf->csa_active); @@ -1773,7 +1766,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) */ link->u.mgd.beacon_crc_valid = false; - ret = drv_post_channel_switch(sdata); + ret = drv_post_channel_switch(link); if (ret) { sdata_info(sdata, "driver post channel switch failed, disconnecting\n"); @@ -1782,28 +1775,38 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link) return; } - cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, 0, 0); + cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, + link->link_id, 0); } -void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) +void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, + unsigned int link_id) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif))) - success = false; + trace_api_chswitch_done(sdata, success, link_id); + + rcu_read_lock(); - trace_api_chswitch_done(sdata, success); if (!success) { sdata_info(sdata, "driver channel switch failed, disconnecting\n"); wiphy_work_queue(sdata->local->hw.wiphy, - &ifmgd->csa_connection_drop_work); + &sdata->u.mgd.csa_connection_drop_work); } else { + struct ieee80211_link_data *link = + rcu_dereference(sdata->link[link_id]); + + if (WARN_ON(!link)) { + rcu_read_unlock(); + return; + } + wiphy_delayed_work_queue(sdata->local->hw.wiphy, - &sdata->deflink.u.mgd.chswitch_work, - 0); + &link->u.mgd.chswitch_work, 0); } + + rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_chswitch_done); @@ -1813,14 +1816,12 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_local *local = sdata->local; + lockdep_assert_wiphy(local->hw.wiphy); + if (!local->ops->abort_channel_switch) return; - mutex_lock(&local->mtx); - - mutex_lock(&local->chanctx_mtx); ieee80211_link_unreserve_chanctx(link); - mutex_unlock(&local->chanctx_mtx); if (link->csa_block_tx) ieee80211_wake_vif_queues(local, sdata, @@ -1829,8 +1830,6 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link) link->csa_block_tx = false; link->conf->csa_active = false; - mutex_unlock(&local->mtx); - drv_abort_channel_switch(sdata); } @@ -1853,7 +1852,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, unsigned long timeout; int res; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); if (!cbss) return; @@ -1875,7 +1874,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } if (res < 0) - goto lock_and_drop_connection; + goto drop_connection; if (beacon && link->conf->csa_active && !link->u.mgd.csa_waiting_bcn) { @@ -1897,7 +1896,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.chandef.chan->center_freq, csa_ie.chandef.width, csa_ie.chandef.center_freq1, csa_ie.chandef.center_freq2); - goto lock_and_drop_connection; + goto drop_connection; } if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef, @@ -1912,7 +1911,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.chandef.width, csa_ie.chandef.center_freq1, csa_ie.chandef.freq1_offset, csa_ie.chandef.center_freq2); - goto lock_and_drop_connection; + goto drop_connection; } if (cfg80211_chandef_identical(&csa_ie.chandef, @@ -1935,10 +1934,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, */ ieee80211_teardown_tdls_peers(sdata); - mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!conf) { sdata_info(sdata, "no channel context assigned to vif?, disconnecting\n"); @@ -1968,7 +1965,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, res); goto drop_connection; } - mutex_unlock(&local->chanctx_mtx); link->conf->csa_active = true; link->csa_chandef = csa_ie.chandef; @@ -1979,7 +1975,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, if (link->csa_block_tx) ieee80211_stop_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); - mutex_unlock(&local->mtx); cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, link->link_id, csa_ie.count, @@ -1998,9 +1993,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, &link->u.mgd.chswitch_work, timeout); return; - lock_and_drop_connection: - mutex_lock(&local->mtx); - mutex_lock(&local->chanctx_mtx); drop_connection: /* * This is just so that the disconnect flow will know that @@ -2014,8 +2006,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, wiphy_work_queue(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - mutex_unlock(&local->chanctx_mtx); - mutex_unlock(&local->mtx); } static bool @@ -2211,7 +2201,8 @@ static void ieee80211_change_ps(struct ieee80211_local *local) conf->flags &= ~IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); del_timer_sync(&local->dynamic_ps_timer); - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, + &local->dynamic_ps_enable_work); } } @@ -2308,7 +2299,8 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata) } } -void ieee80211_dynamic_ps_disable_work(struct work_struct *work) +void ieee80211_dynamic_ps_disable_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, @@ -2325,7 +2317,8 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work) false); } -void ieee80211_dynamic_ps_enable_work(struct work_struct *work) +void ieee80211_dynamic_ps_enable_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, @@ -2398,26 +2391,25 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t) { struct ieee80211_local *local = from_timer(local, t, dynamic_ps_timer); - ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work); + wiphy_work_queue(local->hw.wiphy, &local->dynamic_ps_enable_work); } -void ieee80211_dfs_cac_timer_work(struct work_struct *work) +void ieee80211_dfs_cac_timer_work(struct wiphy *wiphy, struct wiphy_work *work) { - struct delayed_work *delayed_work = to_delayed_work(work); struct ieee80211_link_data *link = - container_of(delayed_work, struct ieee80211_link_data, - dfs_cac_timer_work); + container_of(work, struct ieee80211_link_data, + dfs_cac_timer_work.work); struct cfg80211_chan_def chandef = link->conf->chandef; struct ieee80211_sub_if_data *sdata = link->sdata; - mutex_lock(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (sdata->wdev.cac_started) { ieee80211_link_release_channel(link); cfg80211_cac_event(sdata->dev, &chandef, NL80211_RADAR_CAC_FINISHED, GFP_KERNEL); } - mutex_unlock(&sdata->local->mtx); } static bool @@ -2487,8 +2479,10 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) ac); tx_tspec->action = TX_TSPEC_ACTION_NONE; ret = true; - schedule_delayed_work(&ifmgd->tx_tspec_wk, - tx_tspec->time_slice_start + HZ - now + 1); + wiphy_delayed_work_queue(local->hw.wiphy, + &ifmgd->tx_tspec_wk, + tx_tspec->time_slice_start + + HZ - now + 1); break; case TX_TSPEC_ACTION_NONE: /* nothing now */ @@ -2506,7 +2500,8 @@ void ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_QOS); } -static void ieee80211_sta_handle_tspec_ac_params_wk(struct work_struct *work) +static void ieee80211_sta_handle_tspec_ac_params_wk(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata; @@ -2681,7 +2676,7 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local, static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) { - lockdep_assert_held(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); sdata->u.mgd.flags &= ~IEEE80211_STA_CONNECTION_POLL; ieee80211_run_deferred_scan(sdata->local); @@ -2689,9 +2684,9 @@ static void __ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) static void ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata) { - mutex_lock(&sdata->local->mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + __ieee80211_stop_poll(sdata); - mutex_unlock(&sdata->local->mtx); } static u64 ieee80211_handle_bss_capability(struct ieee80211_link_data *link, @@ -2809,6 +2804,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, u64 vif_changed = BSS_CHANGED_ASSOC; unsigned int link_id; + lockdep_assert_wiphy(local->hw.wiphy); + sdata->u.mgd.associated = true; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { @@ -2870,9 +2867,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, vif_changed | changed[0]); } - mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local); - mutex_unlock(&local->iflist_mtx); /* leave this here to not change ordering in non-MLO cases */ if (!ieee80211_vif_is_mld(&sdata->vif)) @@ -2894,7 +2889,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, .subtype = stype, }; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON_ONCE(tx && !frame_buf)) return; @@ -2908,6 +2903,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* other links will be destroyed */ sdata->deflink.u.mgd.bss = NULL; + sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; netif_carrier_off(sdata->dev); @@ -2945,9 +2941,22 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, * deauthentication frame by calling mgd_prepare_tx, if the * driver requested so. */ - if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP) && - !sdata->deflink.u.mgd.have_beacon) { - drv_mgd_prepare_tx(sdata->local, sdata, &info); + if (ieee80211_hw_check(&local->hw, DEAUTH_NEED_MGD_TX_PREP)) { + for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); + link_id++) { + struct ieee80211_link_data *link; + + link = sdata_dereference(sdata->link[link_id], + sdata); + if (!link) + continue; + if (link->u.mgd.have_beacon) + break; + } + if (link_id == IEEE80211_MLD_MAX_NUM_LINKS) { + info.link_id = ffs(sdata->vif.active_links) - 1; + drv_mgd_prepare_tx(sdata->local, sdata, &info); + } } ieee80211_send_deauth_disassoc(sdata, sdata->vif.cfg.ap_addr, @@ -3003,7 +3012,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->deflink.ap_power_level = IEEE80211_UNSET_POWER_LEVEL; del_timer_sync(&local->dynamic_ps_timer); - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); /* Disable ARP filtering */ if (sdata->vif.cfg.arp_addr_cnt) @@ -3035,7 +3044,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags = 0; sdata->deflink.u.mgd.conn_flags = 0; - mutex_lock(&local->mtx); for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) { struct ieee80211_link_data *link; @@ -3054,17 +3062,19 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, IEEE80211_QUEUE_STOP_REASON_CSA); sdata->deflink.csa_block_tx = false; } - mutex_unlock(&local->mtx); /* existing TX TSPEC sessions no longer exist */ memset(ifmgd->tx_tspec, 0, sizeof(ifmgd->tx_tspec)); - cancel_delayed_work_sync(&ifmgd->tx_tspec_wk); + wiphy_delayed_work_cancel(local->hw.wiphy, &ifmgd->tx_tspec_wk); sdata->vif.bss_conf.pwr_reduction = 0; sdata->vif.bss_conf.tx_pwr_env_num = 0; memset(sdata->vif.bss_conf.tx_pwr_env, 0, sizeof(sdata->vif.bss_conf.tx_pwr_env)); + memset(&sdata->u.mgd.ttlm_info, 0, + sizeof(sdata->u.mgd.ttlm_info)); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); ieee80211_vif_set_links(sdata, 0, 0); } @@ -3073,18 +3083,17 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + if (!(ifmgd->flags & IEEE80211_STA_CONNECTION_POLL)) - goto out; + return; __ieee80211_stop_poll(sdata); - mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local); - mutex_unlock(&local->iflist_mtx); if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) - goto out; + return; /* * We've received a probe response, but are not sure whether @@ -3096,8 +3105,6 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata) mod_timer(&ifmgd->conn_mon_timer, round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); -out: - mutex_unlock(&local->mtx); } static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, @@ -3126,7 +3133,8 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, if (tx_tspec->downgraded) { tx_tspec->action = TX_TSPEC_ACTION_STOP_DOWNGRADE; - schedule_delayed_work(&ifmgd->tx_tspec_wk, 0); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &ifmgd->tx_tspec_wk, 0); } } @@ -3138,7 +3146,8 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, if (tx_tspec->consumed_tx_time >= tx_tspec->admitted_time) { tx_tspec->downgraded = true; tx_tspec->action = TX_TSPEC_ACTION_DOWNGRADE; - schedule_delayed_work(&ifmgd->tx_tspec_wk, 0); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &ifmgd->tx_tspec_wk, 0); } } @@ -3179,6 +3188,8 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) u8 unicast_limit = max(1, max_probe_tries - 3); struct sta_info *sta; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (WARN_ON(ieee80211_vif_is_mld(&sdata->vif))) return; @@ -3200,11 +3211,9 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) ifmgd->probe_send_count++; if (dst) { - mutex_lock(&sdata->local->sta_mtx); sta = sta_info_get(sdata, dst); if (!WARN_ON(!sta)) ieee80211_check_fast_rx(sta); - mutex_unlock(&sdata->local->sta_mtx); } if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { @@ -3227,29 +3236,24 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; bool already = false; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (WARN_ON_ONCE(ieee80211_vif_is_mld(&sdata->vif))) return; if (!ieee80211_sdata_running(sdata)) return; - sdata_lock(sdata); - if (!ifmgd->associated) - goto out; - - mutex_lock(&sdata->local->mtx); + return; - if (sdata->local->tmp_channel || sdata->local->scanning) { - mutex_unlock(&sdata->local->mtx); - goto out; - } + if (sdata->local->tmp_channel || sdata->local->scanning) + return; if (sdata->local->suspending) { /* reschedule after resume */ - mutex_unlock(&sdata->local->mtx); ieee80211_reset_ap_probe(sdata); - goto out; + return; } if (beacon) { @@ -3276,19 +3280,13 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL; - mutex_unlock(&sdata->local->mtx); - if (already) - goto out; + return; - mutex_lock(&sdata->local->iflist_mtx); ieee80211_recalc_ps(sdata->local); - mutex_unlock(&sdata->local->iflist_mtx); ifmgd->probe_send_count = 0; ieee80211_mgd_probe_ap_send(sdata); - out: - sdata_unlock(sdata); } struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, @@ -3301,12 +3299,12 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, const struct element *ssid; int ssid_len; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION || ieee80211_vif_is_mld(&sdata->vif))) return NULL; - sdata_assert_lock(sdata); - if (ifmgd->associated) cbss = sdata->deflink.u.mgd.bss; else if (ifmgd->auth_data) @@ -3353,13 +3351,15 @@ static void ieee80211_report_disconnect(struct ieee80211_sub_if_data *sdata, drv_event_callback(sdata->local, sdata, &event); } -static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) +static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; bool tx; + lockdep_assert_wiphy(local->hw.wiphy); + if (!ifmgd->associated) return; @@ -3395,7 +3395,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DEAUTH_LEAVING : WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, tx, frame_buf); - mutex_lock(&local->mtx); /* the other links will be destroyed */ sdata->vif.bss_conf.csa_active = false; sdata->deflink.u.mgd.csa_waiting_bcn = false; @@ -3404,7 +3403,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) IEEE80211_QUEUE_STOP_REASON_CSA); sdata->deflink.csa_block_tx = false; } - mutex_unlock(&local->mtx); ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, @@ -3412,13 +3410,6 @@ static void ___ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ifmgd->reconnect = false; } -static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) -{ - sdata_lock(sdata); - ___ieee80211_disconnect(sdata); - sdata_unlock(sdata); -} - static void ieee80211_beacon_connection_loss_work(struct wiphy *wiphy, struct wiphy_work *work) { @@ -3500,7 +3491,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!assoc) { /* @@ -3518,10 +3509,8 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); ieee80211_vif_set_links(sdata, 0, 0); - mutex_unlock(&sdata->local->mtx); } cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss); @@ -3541,7 +3530,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (status != ASSOC_SUCCESS) { /* @@ -3577,10 +3566,8 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, cfg80211_assoc_failure(sdata->dev, &data); } - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); ieee80211_vif_set_links(sdata, 0, 0); - mutex_unlock(&sdata->local->mtx); } kfree(assoc_data); @@ -3597,6 +3584,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, u32 tx_flags = 0; struct ieee80211_prep_tx_info info = { .subtype = IEEE80211_STYPE_AUTH, + .link_id = auth_data->link_id, }; pos = mgmt->u.auth.variable; @@ -3622,7 +3610,8 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *ap_addr = ifmgd->auth_data->ap_addr; struct sta_info *sta; - bool result = true; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; @@ -3631,22 +3620,17 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata) run_again(sdata, ifmgd->auth_data->timeout); /* move station state to auth */ - mutex_lock(&sdata->local->sta_mtx); sta = sta_info_get(sdata, ap_addr); if (!sta) { WARN_ONCE(1, "%s: STA %pM not found", sdata->name, ap_addr); - result = false; - goto out; + return false; } if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) { sdata_info(sdata, "failed moving %pM to auth\n", ap_addr); - result = false; - goto out; + return false; } -out: - mutex_unlock(&sdata->local->sta_mtx); - return result; + return true; } static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, @@ -3662,7 +3646,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, .subtype = IEEE80211_STYPE_AUTH, }; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 6) return; @@ -3820,7 +3804,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 2) return; @@ -3864,7 +3848,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (len < 24 + 2) return; @@ -3894,8 +3878,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, u8 *supp_rates, unsigned int supp_rates_len, u32 *rates, u32 *basic_rates, bool *have_higher_than_11mbit, - int *min_rate, int *min_rate_index, - int shift) + int *min_rate, int *min_rate_index) { int i, j; @@ -3903,7 +3886,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, int rate = supp_rates[i] & 0x7f; bool is_basic = !!(supp_rates[i] & 0x80); - if ((rate * 5 * (1 << shift)) > 110) + if ((rate * 5) > 110) *have_higher_than_11mbit = true; /* @@ -3927,7 +3910,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband, br = &sband->bitrates[j]; - brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5); + brate = DIV_ROUND_UP(br->bitrate, 5); if (brate == rate) { *rates |= BIT(j); if (is_basic) @@ -4394,8 +4377,6 @@ static int ieee80211_mgd_setup_link_sta(struct ieee80211_link_data *link, u32 rates = 0, basic_rates = 0; bool have_higher_than_11mbit = false; int min_rate = INT_MAX, min_rate_index = -1; - /* this is clearly wrong for MLO but we'll just remove it later */ - int shift = ieee80211_vif_get_shift(&sdata->vif); struct ieee80211_supported_band *sband; memcpy(link_sta->addr, cbss->bssid, ETH_ALEN); @@ -4411,7 +4392,7 @@ static int ieee80211_mgd_setup_link_sta(struct ieee80211_link_data *link, ieee80211_get_rates(sband, bss->supp_rates, bss->supp_rates_len, &rates, &basic_rates, &have_higher_than_11mbit, - &min_rate, &min_rate_index, shift); + &min_rate, &min_rate_index); /* * This used to be a workaround for basic rates missing @@ -4817,6 +4798,7 @@ ieee80211_verify_sta_eht_mcs_support(struct ieee80211_sub_if_data *sdata, static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct cfg80211_bss *cbss, + bool mlo, ieee80211_conn_flags_t *conn_flags) { struct ieee80211_local *local = sdata->local; @@ -4830,6 +4812,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def chandef; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; bool is_5ghz = cbss->channel->band == NL80211_BAND_5GHZ; + bool supports_mlo = false; struct ieee80211_bss *bss = (void *)cbss->priv; struct ieee80211_elems_parse_params parse_params = { .link_id = -1, @@ -4841,6 +4824,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, u32 i; bool have_80mhz; + lockdep_assert_wiphy(local->hw.wiphy); + rcu_read_lock(); ies = rcu_dereference(cbss->ies); @@ -4981,6 +4966,8 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ieee80211_mle_type_ok(eht_ml_elem->data + 1, IEEE80211_ML_CONTROL_TYPE_BASIC, eht_ml_elem->datalen - 1)) { + supports_mlo = true; + sdata->vif.cfg.eml_cap = ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1); sdata->vif.cfg.eml_med_sync_delay = @@ -5036,13 +5023,14 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, return -EINVAL; } + if (mlo && !supports_mlo) { + sdata_info(sdata, "Rejecting MLO as it is not supported by AP\n"); + return -EINVAL; + } + if (!link) return 0; - /* will change later if needed */ - link->smps_mode = IEEE80211_SMPS_OFF; - - mutex_lock(&local->mtx); /* * If this fails (possibly due to channel context sharing * on incompatible channels, e.g. 80+80 and 160 sharing the @@ -5063,7 +5051,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, IEEE80211_CHANCTX_SHARED); } out: - mutex_unlock(&local->mtx); return ret; } @@ -5115,7 +5102,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, u16 valid_links = 0, dormant_links = 0; int err; - mutex_lock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* * station info was already allocated and inserted before * the association and should be available to us @@ -5164,7 +5151,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, " (assoc)" : ""); link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (WARN_ON(!link_sta)) goto out_err; @@ -5187,7 +5174,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, link->conf->dtim_period = link->u.mgd.dtim_period ?: 1; if (link_id != assoc_data->assoc_link_id) { - err = ieee80211_prep_channel(sdata, link, cbss, + err = ieee80211_prep_channel(sdata, link, cbss, true, &link->u.mgd.conn_flags); if (err) { link_info(link, "prep_channel failed\n"); @@ -5251,8 +5238,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (sdata->wdev.use_4addr) drv_sta_set_4addr(local, sdata, &sta->sta, true); - mutex_unlock(&sdata->local->sta_mtx); - ieee80211_set_associated(sdata, assoc_data, changed); /* @@ -5272,7 +5257,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, return true; out_err: eth_zero_addr(sdata->vif.cfg.ap_addr); - mutex_unlock(&sdata->local->sta_mtx); return false; } @@ -5298,13 +5282,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, .u.mlme.data = ASSOC_EVENT, }; struct ieee80211_prep_tx_info info = {}; - struct cfg80211_rx_assoc_resp resp = { + struct cfg80211_rx_assoc_resp_data resp = { .uapsd_queues = -1, }; u8 ap_mld_addr[ETH_ALEN] __aligned(2); unsigned int link_id; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (!assoc_data) return; @@ -5505,7 +5489,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_link_data *link, struct ieee80211_bss *bss; struct ieee80211_channel *channel; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); channel = ieee80211_get_channel_khz(local->hw.wiphy, ieee80211_rx_status_to_khz(rx_status)); @@ -5532,7 +5516,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_link_data *link, ifmgd = &sdata->u.mgd; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* * According to Draft P802.11ax D6.0 clause 26.17.2.3.2: @@ -5743,21 +5727,16 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, u16 new_valid_links, new_active_links, new_dormant_links; int ret; - sdata_lock(sdata); - if (!sdata->u.mgd.removed_links) { - sdata_unlock(sdata); + if (!sdata->u.mgd.removed_links) return; - } sdata_info(sdata, "MLO Reconfiguration: work: valid=0x%x, removed=0x%x\n", sdata->vif.valid_links, sdata->u.mgd.removed_links); new_valid_links = sdata->vif.valid_links & ~sdata->u.mgd.removed_links; - if (new_valid_links == sdata->vif.valid_links) { - sdata_unlock(sdata); + if (new_valid_links == sdata->vif.valid_links) return; - } if (!new_valid_links || !(new_valid_links & ~sdata->vif.dormant_links)) { @@ -5773,8 +5752,7 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, BIT(ffs(new_valid_links & ~sdata->vif.dormant_links) - 1); - ret = __ieee80211_set_active_links(&sdata->vif, - new_active_links); + ret = ieee80211_set_active_links(&sdata->vif, new_active_links); if (ret) { sdata_info(sdata, "Failed setting active links\n"); @@ -5789,15 +5767,15 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, if (ret) sdata_info(sdata, "Failed setting valid links\n"); + ieee80211_vif_cfg_change_notify(sdata, BSS_CHANGED_MLD_VALID_LINKS); + out: if (!ret) cfg80211_links_removed(sdata->dev, sdata->u.mgd.removed_links); else - ___ieee80211_disconnect(sdata); + __ieee80211_disconnect(sdata); sdata->u.mgd.removed_links = 0; - - sdata_unlock(sdata); } static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, @@ -5899,6 +5877,222 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, TU_TO_JIFFIES(delay)); } +static void ieee80211_tid_to_link_map_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + u16 new_active_links, new_dormant_links; + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.ttlm_work.work); + int ret; + + new_active_links = sdata->u.mgd.ttlm_info.map & + sdata->vif.valid_links; + new_dormant_links = ~sdata->u.mgd.ttlm_info.map & + sdata->vif.valid_links; + if (!new_active_links) { + ieee80211_disconnect(&sdata->vif, false); + return; + } + + ieee80211_vif_set_links(sdata, sdata->vif.valid_links, 0); + new_active_links = BIT(ffs(new_active_links) - 1); + ieee80211_set_active_links(&sdata->vif, new_active_links); + + ret = ieee80211_vif_set_links(sdata, sdata->vif.valid_links, + new_dormant_links); + + sdata->u.mgd.ttlm_info.active = true; + sdata->u.mgd.ttlm_info.switch_time = 0; + + if (!ret) + ieee80211_vif_cfg_change_notify(sdata, + BSS_CHANGED_MLD_VALID_LINKS); +} + +static u16 ieee80211_get_ttlm(u8 bm_size, u8 *data) +{ + if (bm_size == 1) + return *data; + else + return get_unaligned_le16(data); +} + +static int +ieee80211_parse_adv_t2l(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_ttlm_elem *ttlm, + struct ieee80211_adv_ttlm_info *ttlm_info) +{ + /* The element size was already validated in + * ieee80211_tid_to_link_map_size_ok() + */ + u8 control, link_map_presence, map_size, tid; + u8 *pos; + + memset(ttlm_info, 0, sizeof(*ttlm_info)); + pos = (void *)ttlm->optional; + control = ttlm->control; + + if ((control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP) || + !(control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT)) + return 0; + + if ((control & IEEE80211_TTLM_CONTROL_DIRECTION) != + IEEE80211_TTLM_DIRECTION_BOTH) { + sdata_info(sdata, "Invalid advertised T2L map direction\n"); + return -EINVAL; + } + + link_map_presence = *pos; + pos++; + + ttlm_info->switch_time = get_unaligned_le16(pos); + + /* Since ttlm_info->switch_time == 0 means no switch time, bump it + * by 1. + */ + if (!ttlm_info->switch_time) + ttlm_info->switch_time = 1; + + pos += 2; + + if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) { + ttlm_info->duration = pos[0] | pos[1] << 8 | pos[2] << 16; + pos += 3; + } + + if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) + map_size = 1; + else + map_size = 2; + + /* According to Draft P802.11be_D3.0 clause 35.3.7.1.7, an AP MLD shall + * not advertise a TID-to-link mapping that does not map all TIDs to the + * same link set, reject frame if not all links have mapping + */ + if (link_map_presence != 0xff) { + sdata_info(sdata, + "Invalid advertised T2L mapping presence indicator\n"); + return -EINVAL; + } + + ttlm_info->map = ieee80211_get_ttlm(map_size, pos); + if (!ttlm_info->map) { + sdata_info(sdata, + "Invalid advertised T2L map for TID 0\n"); + return -EINVAL; + } + + pos += map_size; + + for (tid = 1; tid < 8; tid++) { + u16 map = ieee80211_get_ttlm(map_size, pos); + + if (map != ttlm_info->map) { + sdata_info(sdata, "Invalid advertised T2L map for tid %d\n", + tid); + return -EINVAL; + } + + pos += map_size; + } + return 0; +} + +static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, + struct ieee802_11_elems *elems, + u64 beacon_ts) +{ + u8 i; + int ret; + + if (!ieee80211_vif_is_mld(&sdata->vif)) + return; + + if (!elems->ttlm_num) { + if (sdata->u.mgd.ttlm_info.switch_time) { + /* if a planned TID-to-link mapping was cancelled - + * abort it + */ + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.ttlm_work); + } else if (sdata->u.mgd.ttlm_info.active) { + /* if no TID-to-link element, set to default mapping in + * which all TIDs are mapped to all setup links + */ + ret = ieee80211_vif_set_links(sdata, + sdata->vif.valid_links, + 0); + if (ret) { + sdata_info(sdata, "Failed setting valid/dormant links\n"); + return; + } + ieee80211_vif_cfg_change_notify(sdata, + BSS_CHANGED_MLD_VALID_LINKS); + } + memset(&sdata->u.mgd.ttlm_info, 0, + sizeof(sdata->u.mgd.ttlm_info)); + return; + } + + for (i = 0; i < elems->ttlm_num; i++) { + struct ieee80211_adv_ttlm_info ttlm_info; + u32 res; + + res = ieee80211_parse_adv_t2l(sdata, elems->ttlm[i], + &ttlm_info); + + if (res) { + __ieee80211_disconnect(sdata); + return; + } + + if (ttlm_info.switch_time) { + u16 beacon_ts_tu, st_tu, delay; + u32 delay_jiffies; + u64 mask; + + /* The t2l map switch time is indicated with a partial + * TSF value (bits 10 to 25), get the partial beacon TS + * as well, and calc the delay to the start time. + */ + mask = GENMASK_ULL(25, 10); + beacon_ts_tu = (beacon_ts & mask) >> 10; + st_tu = ttlm_info.switch_time; + delay = st_tu - beacon_ts_tu; + + /* + * If the switch time is far in the future, then it + * could also be the previous switch still being + * announced. + * We can simply ignore it for now, if it is a future + * switch the AP will continue to announce it anyway. + */ + if (delay > IEEE80211_ADV_TTLM_ST_UNDERFLOW) + return; + + delay_jiffies = TU_TO_JIFFIES(delay); + + /* Link switching can take time, so schedule it + * 100ms before to be ready on time + */ + if (delay_jiffies > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) + delay_jiffies -= + IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS; + else + delay_jiffies = 0; + + sdata->u.mgd.ttlm_info = ttlm_info; + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.ttlm_work); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.ttlm_work, + delay_jiffies); + return; + } + } +} + static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, struct ieee80211_hdr *hdr, size_t len, struct ieee80211_rx_status *rx_status) @@ -5927,7 +6121,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, .from_ap = true, }; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(local->hw.wiphy); /* Process beacon from the current BSS */ bssid = ieee80211_get_bssid(hdr, len, sdata->vif.type); @@ -6143,9 +6337,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, changed |= BSS_CHANGED_BEACON_INFO; link->u.mgd.have_beacon = true; - mutex_lock(&local->iflist_mtx); ieee80211_recalc_ps(local); - mutex_unlock(&local->iflist_mtx); ieee80211_recalc_ps_vif(sdata); } @@ -6162,16 +6354,13 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, le16_to_cpu(mgmt->u.beacon.capab_info), erp_valid, erp_value); - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); if (WARN_ON(!sta)) { - mutex_unlock(&local->sta_mtx); goto free; } link_sta = rcu_dereference_protected(sta->link[link->link_id], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (WARN_ON(!link_sta)) { - mutex_unlock(&local->sta_mtx); goto free; } @@ -6187,7 +6376,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, elems->vht_operation, elems->he_operation, elems->eht_operation, elems->s1g_oper, bssid, &changed)) { - mutex_unlock(&local->sta_mtx); sdata_info(sdata, "failed to follow AP %pM bandwidth change, disconnect\n", bssid); @@ -6205,7 +6393,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, ieee80211_vht_handle_opmode(sdata, link_sta, *elems->opmode_notif, rx_status->band); - mutex_unlock(&local->sta_mtx); changed |= ieee80211_handle_pwr_constr(link, chan, mgmt, elems->country_elem, @@ -6229,6 +6416,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, } ieee80211_ml_reconfiguration(sdata, elems); + ieee80211_process_adv_ttlm(sdata, elems, + le64_to_cpu(mgmt->u.beacon.timestamp)); ieee80211_link_info_change_notify(sdata, link, changed); free: @@ -6243,17 +6432,17 @@ void ieee80211_sta_rx_queued_ext(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr; u16 fc; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + rx_status = (struct ieee80211_rx_status *) skb->cb; hdr = (struct ieee80211_hdr *) skb->data; fc = le16_to_cpu(hdr->frame_control); - sdata_lock(sdata); switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_S1G_BEACON: ieee80211_rx_mgmt_beacon(link, hdr, skb->len, rx_status); break; } - sdata_unlock(sdata); } void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, @@ -6265,17 +6454,17 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, u16 fc; int ies_len; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + rx_status = (struct ieee80211_rx_status *) skb->cb; mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - sdata_lock(sdata); - if (rx_status->link_valid) { link = sdata_dereference(sdata->link[rx_status->link_id], sdata); if (!link) - goto out; + return; } switch (fc & IEEE80211_FCTL_STYPE) { @@ -6358,8 +6547,6 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } break; } -out: - sdata_unlock(sdata); } static void ieee80211_sta_timer(struct timer_list *t) @@ -6394,7 +6581,7 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) .subtype = IEEE80211_STYPE_AUTH, }; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (WARN_ON_ONCE(!auth_data)) return -EINVAL; @@ -6417,6 +6604,7 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) if (auth_data->algorithm == WLAN_AUTH_SAE) info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE); + info.link_id = auth_data->link_id; drv_mgd_prepare_tx(local, sdata, &info); sdata_info(sdata, "send auth to %pM (try %d/%d)\n", @@ -6463,7 +6651,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; int ret; - sdata_assert_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); assoc_data->tries++; if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { @@ -6519,7 +6707,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - sdata_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifmgd->status_received) { __le16 fc = ifmgd->status_fc; @@ -6654,8 +6842,6 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, false); } } - - sdata_unlock(sdata); } static void ieee80211_sta_bcn_mon_timer(struct timer_list *t) @@ -6711,10 +6897,11 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t) return; } - ieee80211_queue_work(&local->hw, &ifmgd->monitor_work); + wiphy_work_queue(local->hw.wiphy, &sdata->u.mgd.monitor_work); } -static void ieee80211_sta_monitor_work(struct work_struct *work) +static void ieee80211_sta_monitor_work(struct wiphy *wiphy, + struct wiphy_work *work) { struct ieee80211_sub_if_data *sdata = container_of(work, struct ieee80211_sub_if_data, @@ -6730,8 +6917,8 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) /* let's probe the connection once */ if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) - ieee80211_queue_work(&sdata->local->hw, - &sdata->u.mgd.monitor_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.monitor_work); } } @@ -6741,7 +6928,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - sdata_lock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); if (ifmgd->auth_data || ifmgd->assoc_data) { const u8 *ap_addr = ifmgd->auth_data ? @@ -6793,8 +6980,6 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) memcpy(bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); ieee80211_mgd_deauth(sdata, &req); } - - sdata_unlock(sdata); } #endif @@ -6802,11 +6987,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - sdata_lock(sdata); - if (!ifmgd->associated) { - sdata_unlock(sdata); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (!ifmgd->associated) return; - } if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; @@ -6814,7 +6998,6 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) ieee80211_sta_connection_lost(sdata, WLAN_REASON_UNSPECIFIED, true); - sdata_unlock(sdata); return; } @@ -6824,11 +7007,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) ieee80211_sta_connection_lost(sdata, WLAN_REASON_UNSPECIFIED, true); - sdata_unlock(sdata); return; } - - sdata_unlock(sdata); } static void ieee80211_request_smps_mgd_work(struct wiphy *wiphy, @@ -6838,10 +7018,8 @@ static void ieee80211_request_smps_mgd_work(struct wiphy *wiphy, container_of(work, struct ieee80211_link_data, u.mgd.request_smps_work); - sdata_lock(link->sdata); __ieee80211_request_smps_mgd(link->sdata, link, link->u.mgd.driver_smps_mode); - sdata_unlock(link->sdata); } /* interface setup */ @@ -6849,20 +7027,22 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work); + wiphy_work_init(&ifmgd->monitor_work, ieee80211_sta_monitor_work); wiphy_work_init(&ifmgd->beacon_connection_loss_work, ieee80211_beacon_connection_loss_work); wiphy_work_init(&ifmgd->csa_connection_drop_work, ieee80211_csa_connection_drop_work); - INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work, - ieee80211_tdls_peer_del_work); + wiphy_delayed_work_init(&ifmgd->tdls_peer_del_work, + ieee80211_tdls_peer_del_work); wiphy_delayed_work_init(&ifmgd->ml_reconf_work, ieee80211_ml_reconf_work); timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0); timer_setup(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 0); timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); - INIT_DELAYED_WORK(&ifmgd->tx_tspec_wk, - ieee80211_sta_handle_tspec_ac_params_wk); + wiphy_delayed_work_init(&ifmgd->tx_tspec_wk, + ieee80211_sta_handle_tspec_ac_params_wk); + wiphy_delayed_work_init(&ifmgd->ttlm_work, + ieee80211_tid_to_link_map_work); ifmgd->flags = 0; ifmgd->powersave = sdata->wdev.ps; @@ -6874,6 +7054,16 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->orig_teardown_skb = NULL; } +static void ieee80211_recalc_smps_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct ieee80211_link_data *link = + container_of(work, struct ieee80211_link_data, + u.mgd.recalc_smps); + + ieee80211_recalc_smps(link->sdata, link); +} + void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) { struct ieee80211_sub_if_data *sdata = link->sdata; @@ -6883,9 +7073,12 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) link->u.mgd.p2p_noa_index = -1; link->u.mgd.conn_flags = 0; link->conf->bssid = link->u.mgd.bssid; + link->smps_mode = IEEE80211_SMPS_OFF; wiphy_work_init(&link->u.mgd.request_smps_work, ieee80211_request_smps_mgd_work); + wiphy_work_init(&link->u.mgd.recalc_smps, + ieee80211_recalc_smps_work); if (local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS) link->u.mgd.req_smps = IEEE80211_SMPS_AUTOMATIC; else @@ -7049,7 +7242,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } if (new_sta || override) { - err = ieee80211_prep_channel(sdata, link, cbss, + err = ieee80211_prep_channel(sdata, link, cbss, mlo, &link->u.mgd.conn_flags); if (err) { if (new_sta) @@ -7094,6 +7287,75 @@ out_err: return err; } +static bool ieee80211_mgd_csa_present(struct ieee80211_sub_if_data *sdata, + const struct cfg80211_bss_ies *ies, + u8 cur_channel, bool ignore_ecsa) +{ + const struct element *csa_elem, *ecsa_elem; + struct ieee80211_channel_sw_ie *csa = NULL; + struct ieee80211_ext_chansw_ie *ecsa = NULL; + + if (!ies) + return false; + + csa_elem = cfg80211_find_elem(WLAN_EID_CHANNEL_SWITCH, + ies->data, ies->len); + if (csa_elem && csa_elem->datalen == sizeof(*csa)) + csa = (void *)csa_elem->data; + + ecsa_elem = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN, + ies->data, ies->len); + if (ecsa_elem && ecsa_elem->datalen == sizeof(*ecsa)) + ecsa = (void *)ecsa_elem->data; + + if (csa && csa->count == 0) + csa = NULL; + if (csa && !csa->mode && csa->new_ch_num == cur_channel) + csa = NULL; + + if (ecsa && ecsa->count == 0) + ecsa = NULL; + if (ecsa && !ecsa->mode && ecsa->new_ch_num == cur_channel) + ecsa = NULL; + + if (ignore_ecsa && ecsa) { + sdata_info(sdata, + "Ignoring ECSA in probe response - was considered stuck!\n"); + return csa; + } + + return csa || ecsa; +} + +static bool ieee80211_mgd_csa_in_process(struct ieee80211_sub_if_data *sdata, + struct cfg80211_bss *bss) +{ + u8 cur_channel; + bool ret; + + cur_channel = ieee80211_frequency_to_channel(bss->channel->center_freq); + + rcu_read_lock(); + if (ieee80211_mgd_csa_present(sdata, + rcu_dereference(bss->beacon_ies), + cur_channel, false)) { + ret = true; + goto out; + } + + if (ieee80211_mgd_csa_present(sdata, + rcu_dereference(bss->proberesp_ies), + cur_channel, bss->proberesp_ecsa_stuck)) { + ret = true; + goto out; + } + + ret = false; +out: + rcu_read_unlock(); + return ret; +} + /* config hooks */ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, struct cfg80211_auth_request *req) @@ -7101,10 +7363,13 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_auth_data *auth_data; + struct ieee80211_link_data *link; u16 auth_alg; int err; bool cont_auth; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + /* prepare auth data structure */ switch (req->auth_type) { @@ -7141,6 +7406,11 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->assoc_data) return -EBUSY; + if (ieee80211_mgd_csa_in_process(sdata, req->bss)) { + sdata_info(sdata, "AP is in CSA process, reject auth\n"); + return -EINVAL; + } + auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len + req->ie_len, GFP_KERNEL); if (!auth_data) @@ -7224,8 +7494,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, false); } - sdata_info(sdata, "authenticate with %pM\n", auth_data->ap_addr); - /* needed for transmitting the auth frame(s) properly */ memcpy(sdata->vif.cfg.ap_addr, auth_data->ap_addr, ETH_ALEN); @@ -7234,6 +7502,19 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (err) goto err_clear; + if (req->link_id > 0) + link = sdata_dereference(sdata->link[req->link_id], sdata); + else + link = sdata_dereference(sdata->link[0], sdata); + + if (WARN_ON(!link)) { + err = -ENOLINK; + goto err_clear; + } + + sdata_info(sdata, "authenticate with %pM (local address=%pM)\n", + auth_data->ap_addr, link->conf->addr); + err = ieee80211_auth(sdata); if (err) { sta_info_destroy_addr(sdata, auth_data->ap_addr); @@ -7249,9 +7530,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, eth_zero_addr(sdata->deflink.u.mgd.bssid); ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_BSSID); - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&sdata->local->mtx); } ifmgd->auth_data = NULL; kfree(auth_data); @@ -7266,7 +7545,7 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, unsigned int link_id) { struct ieee80211_local *local = sdata->local; - const struct cfg80211_bss_ies *beacon_ies; + const struct cfg80211_bss_ies *bss_ies; struct ieee80211_supported_band *sband; const struct element *ht_elem, *vht_elem; struct ieee80211_link_data *link; @@ -7341,32 +7620,37 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, link->conf->eht_puncturing = 0; rcu_read_lock(); - beacon_ies = rcu_dereference(cbss->beacon_ies); - if (beacon_ies) { - const struct ieee80211_eht_operation *eht_oper; - const struct element *elem; + bss_ies = rcu_dereference(cbss->beacon_ies); + if (bss_ies) { u8 dtim_count = 0; - ieee80211_get_dtim(beacon_ies, &dtim_count, + ieee80211_get_dtim(bss_ies, &dtim_count, &link->u.mgd.dtim_period); sdata->deflink.u.mgd.have_beacon = true; if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) { - link->conf->sync_tsf = beacon_ies->tsf; + link->conf->sync_tsf = bss_ies->tsf; link->conf->sync_device_ts = bss->device_ts_beacon; link->conf->sync_dtim_count = dtim_count; } + } else { + bss_ies = rcu_dereference(cbss->ies); + } + + if (bss_ies) { + const struct ieee80211_eht_operation *eht_oper; + const struct element *elem; elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION, - beacon_ies->data, beacon_ies->len); + bss_ies->data, bss_ies->len); if (elem && elem->datalen >= 3) link->conf->profile_periodicity = elem->data[2]; else link->conf->profile_periodicity = 0; elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, - beacon_ies->data, beacon_ies->len); + bss_ies->data, bss_ies->len); if (elem && elem->datalen >= 11 && (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) link->conf->ema_ap = true; @@ -7374,7 +7658,7 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata, link->conf->ema_ap = false; elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION, - beacon_ies->data, beacon_ies->len); + bss_ies->data, bss_ies->len); eht_oper = (const void *)(elem->data + 1); if (elem && @@ -7457,6 +7741,12 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss; + if (ieee80211_mgd_csa_in_process(sdata, cbss)) { + sdata_info(sdata, "AP is in CSA process, reject assoc\n"); + kfree(assoc_data); + return -EINVAL; + } + rcu_read_lock(); ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID); if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) { @@ -7464,6 +7754,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, kfree(assoc_data); return -EINVAL; } + memcpy(assoc_data->ssid, ssid_elem->data, ssid_elem->datalen); assoc_data->ssid_len = ssid_elem->datalen; memcpy(vif_cfg->ssid, assoc_data->ssid, assoc_data->ssid_len); @@ -7524,7 +7815,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, match = ether_addr_equal(ifmgd->auth_data->ap_addr, assoc_data->ap_addr) && ifmgd->auth_data->link_id == req->link_id; - ieee80211_destroy_auth_data(sdata, match); + + /* Cleanup is delayed if auth_data matches */ + if (!match) + ieee80211_destroy_auth_data(sdata, false); } /* prepare assoc data */ @@ -7705,10 +7999,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, if (i == assoc_data->assoc_link_id) continue; /* only calculate the flags, hence link == NULL */ - err = ieee80211_prep_channel(sdata, NULL, assoc_data->link[i].bss, + err = ieee80211_prep_channel(sdata, NULL, + assoc_data->link[i].bss, true, &assoc_data->link[i].conn_flags); - if (err) + if (err) { + req->links[i].error = err; goto err_clear; + } } /* needed for transmitting the assoc frames properly */ @@ -7727,8 +8024,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); beacon_ies = rcu_dereference(req->bss->beacon_ies); - - if (beacon_ies) { + if (!beacon_ies) { /* * Wait up to one beacon interval ... * should this be more if we miss one? @@ -7744,11 +8040,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, run_again(sdata, assoc_data->timeout); + /* We are associating, clean up auth_data */ + if (ifmgd->auth_data) + ieee80211_destroy_auth_data(sdata, true); + return 0; err_clear: - eth_zero_addr(sdata->deflink.u.mgd.bssid); - ieee80211_link_info_change_notify(sdata, &sdata->deflink, - BSS_CHANGED_BSSID); + if (!ifmgd->auth_data) { + eth_zero_addr(sdata->deflink.u.mgd.bssid); + ieee80211_link_info_change_notify(sdata, &sdata->deflink, + BSS_CHANGED_BSSID); + } ifmgd->assoc_data = NULL; err_free: kfree(assoc_data); @@ -7772,6 +8074,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code)); + info.link_id = ifmgd->auth_data->link_id; drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, @@ -7792,6 +8095,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->bssid, req->reason_code, ieee80211_get_reason_code_string(req->reason_code)); + info.link_id = ifmgd->assoc_data->assoc_link_id; drv_mgd_prepare_tx(sdata->local, sdata, &info); ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, @@ -7801,6 +8105,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; } @@ -7851,6 +8156,8 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) { wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.request_smps_work); + wiphy_work_cancel(link->sdata->local->hw.wiphy, + &link->u.mgd.recalc_smps); wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.chswitch_work); } @@ -7864,16 +8171,18 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) * they will not do anything but might not have been * cancelled when disconnecting. */ - cancel_work_sync(&ifmgd->monitor_work); + wiphy_work_cancel(sdata->local->hw.wiphy, + &ifmgd->monitor_work); wiphy_work_cancel(sdata->local->hw.wiphy, &ifmgd->beacon_connection_loss_work); wiphy_work_cancel(sdata->local->hw.wiphy, &ifmgd->csa_connection_drop_work); - cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &ifmgd->tdls_peer_del_work); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ml_reconf_work); + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); - sdata_lock(sdata); if (ifmgd->assoc_data) ieee80211_destroy_assoc_data(sdata, ASSOC_TIMEOUT); if (ifmgd->auth_data) @@ -7889,7 +8198,6 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) ifmgd->assoc_req_ies_len = 0; spin_unlock_bh(&ifmgd->teardown_lock); del_timer_sync(&ifmgd->timer); - sdata_unlock(sdata); } void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index b44896e145..449af4e1cc 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -44,7 +44,6 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_supported_band *sband; - enum nl80211_bss_scan_width scan_width; struct sta_info *sta; int band; @@ -66,7 +65,6 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, return; } band = chanctx_conf->def.chan->band; - scan_width = cfg80211_chandef_to_scan_width(&chanctx_conf->def); rcu_read_unlock(); sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); @@ -75,8 +73,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, /* Add only mandatory rates for now */ sband = local->hw.wiphy->bands[band]; - sta->sta.deflink.supp_rates[band] = - ieee80211_mandatory_rates(sband, scan_width); + sta->sta.deflink.supp_rates[band] = ieee80211_mandatory_rates(sband); spin_lock(&ifocb->incomplete_lock); list_add(&sta->list, &ifocb->incomplete_stations); @@ -124,11 +121,11 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ocb *ifocb = &sdata->u.ocb; struct sta_info *sta; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (ifocb->joined != true) return; - sdata_lock(sdata); - spin_lock_bh(&ifocb->incomplete_lock); while (!list_empty(&ifocb->incomplete_stations)) { sta = list_first_entry(&ifocb->incomplete_stations, @@ -144,8 +141,6 @@ void ieee80211_ocb_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags)) ieee80211_ocb_housekeeping(sdata); - - sdata_unlock(sdata); } static void ieee80211_ocb_housekeeping_timer(struct timer_list *t) @@ -178,6 +173,8 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID; int err; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + if (ifocb->joined == true) return -EINVAL; @@ -185,10 +182,8 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata, sdata->deflink.smps_mode = IEEE80211_SMPS_OFF; sdata->deflink.needed_rx_chains = sdata->local->rx_chains; - mutex_lock(&sdata->local->mtx); err = ieee80211_link_use_channel(&sdata->deflink, &setup->chandef, IEEE80211_CHANCTX_SHARED); - mutex_unlock(&sdata->local->mtx); if (err) return err; @@ -209,6 +204,8 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct sta_info *sta; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + ifocb->joined = false; sta_info_flush(sdata); @@ -228,9 +225,7 @@ int ieee80211_ocb_leave(struct ieee80211_sub_if_data *sdata) clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_OCB); - mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&sdata->local->mtx); skb_queue_purge(&sdata->skb_queue); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 5bedd9cef4..6c40802025 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -34,7 +34,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) del_timer_sync(&ifmgd->bcn_mon_timer); del_timer_sync(&ifmgd->conn_mon_timer); - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); if (local->hw.conf.flags & IEEE80211_CONF_PS) { offchannel_ps_enabled = true; @@ -84,6 +84,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(local->use_chanctx)) return; @@ -101,7 +103,6 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) false); ieee80211_flush_queues(local, NULL, false); - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; @@ -127,17 +128,17 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) sdata->u.mgd.associated) ieee80211_offchannel_ps_enable(sdata); } - mutex_unlock(&local->iflist_mtx); } void ieee80211_offchannel_return(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; + lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(local->use_chanctx)) return; - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) continue; @@ -161,7 +162,6 @@ void ieee80211_offchannel_return(struct ieee80211_local *local) BSS_CHANGED_BEACON_ENABLED); } } - mutex_unlock(&local->iflist_mtx); ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, @@ -197,7 +197,7 @@ static unsigned long ieee80211_end_finished_rocs(struct ieee80211_local *local, struct ieee80211_roc_work *roc, *tmp; long remaining_dur_min = LONG_MAX; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { long remaining; @@ -264,7 +264,7 @@ static void ieee80211_hw_roc_start(struct wiphy *wiphy, struct wiphy_work *work) container_of(work, struct ieee80211_local, hw_roc_start); struct ieee80211_roc_work *roc; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(roc, &local->roc_list, list) { if (!roc->started) @@ -273,8 +273,6 @@ static void ieee80211_hw_roc_start(struct wiphy *wiphy, struct wiphy_work *work) roc->hw_begun = true; ieee80211_handle_roc_started(roc, local->hw_roc_start_time); } - - mutex_unlock(&local->mtx); } void ieee80211_ready_on_channel(struct ieee80211_hw *hw) @@ -295,7 +293,7 @@ static void _ieee80211_start_next_roc(struct ieee80211_local *local) enum ieee80211_roc_type type; u32 min_dur, max_dur; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(list_empty(&local->roc_list))) return; @@ -386,7 +384,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) { struct ieee80211_roc_work *roc; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (list_empty(&local->roc_list)) { ieee80211_run_deferred_scan(local); @@ -417,7 +415,7 @@ static void __ieee80211_roc_work(struct ieee80211_local *local) struct ieee80211_roc_work *roc; bool on_channel; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(local->ops->remain_on_channel)) return; @@ -456,9 +454,9 @@ static void ieee80211_roc_work(struct wiphy *wiphy, struct wiphy_work *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, roc_work.work); - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + __ieee80211_roc_work(local); - mutex_unlock(&local->mtx); } static void ieee80211_hw_roc_done(struct wiphy *wiphy, struct wiphy_work *work) @@ -466,14 +464,12 @@ static void ieee80211_hw_roc_done(struct wiphy *wiphy, struct wiphy_work *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, hw_roc_done); - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); ieee80211_end_finished_rocs(local, jiffies); /* if there's another roc, start it now */ ieee80211_start_next_roc(local); - - mutex_unlock(&local->mtx); } void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) @@ -537,7 +533,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, bool queued = false, combine_started = true; int ret; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (channel->freq_offset) /* this may work, but is untested */ @@ -675,15 +671,12 @@ int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; - int ret; - mutex_lock(&local->mtx); - ret = ieee80211_start_roc_work(local, sdata, chan, - duration, cookie, NULL, - IEEE80211_ROC_TYPE_NORMAL); - mutex_unlock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - return ret; + return ieee80211_start_roc_work(local, sdata, chan, + duration, cookie, NULL, + IEEE80211_ROC_TYPE_NORMAL); } static int ieee80211_cancel_roc(struct ieee80211_local *local, @@ -692,12 +685,13 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, struct ieee80211_roc_work *roc, *tmp, *found = NULL; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + if (!cookie) return -ENOENT; wiphy_work_flush(local->hw.wiphy, &local->hw_roc_start); - mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (!mgmt_tx && roc->cookie != cookie) continue; @@ -709,7 +703,6 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } if (!found) { - mutex_unlock(&local->mtx); return -ENOENT; } @@ -721,10 +714,26 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, if (local->ops->remain_on_channel) { ret = drv_cancel_remain_on_channel(local, roc->sdata); if (WARN_ON_ONCE(ret)) { - mutex_unlock(&local->mtx); return ret; } + /* + * We could be racing against the notification from the driver: + * + driver is handling the notification on CPU0 + * + user space is cancelling the remain on channel and + * schedules the hw_roc_done worker. + * + * Now hw_roc_done might start to run after the next roc will + * start and mac80211 will think that this second roc has + * ended prematurely. + * Cancel the work to make sure that all the pending workers + * have completed execution. + * Note that this assumes that by the time the driver returns + * from drv_cancel_remain_on_channel, it has completed all + * the processing of related notifications. + */ + wiphy_work_cancel(local->hw.wiphy, &local->hw_roc_done); + /* TODO: * if multiple items were combined here then we really shouldn't * cancel them all - we should wait for as much time as needed @@ -749,7 +758,6 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } out_unlock: - mutex_unlock(&local->mtx); return 0; } @@ -778,6 +786,8 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, int ret; u8 *data; + lockdep_assert_wiphy(local->hw.wiphy); + if (params->dont_wait_for_ack) flags = IEEE80211_TX_CTL_NO_ACK; else @@ -833,13 +843,16 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - sdata_lock(sdata); if (!sdata->u.mgd.associated || (params->offchan && params->wait && local->ops->remain_on_channel && - memcmp(sdata->vif.cfg.ap_addr, mgmt->bssid, ETH_ALEN))) + memcmp(sdata->vif.cfg.ap_addr, mgmt->bssid, ETH_ALEN))) { need_offchan = true; - sdata_unlock(sdata); + } else if (sdata->u.mgd.associated && + ether_addr_equal(sdata->vif.cfg.ap_addr, mgmt->da)) { + sta = sta_info_get_bss(sdata, mgmt->da); + mlo_sta = sta && sta->sta.mlo; + } break; case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; @@ -855,8 +868,6 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (need_offchan && !params->chan) return -EINVAL; - mutex_lock(&local->mtx); - /* Check if the operating channel is the requested channel */ if (!params->chan && mlo_sta) { need_offchan = false; @@ -980,7 +991,6 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, if (ret) ieee80211_free_txskb(&local->hw, skb); out_unlock: - mutex_unlock(&local->mtx); return ret; } @@ -1006,7 +1016,8 @@ void ieee80211_roc_purge(struct ieee80211_local *local, struct ieee80211_roc_work *roc, *tmp; bool work_to_do = false; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (sdata && roc->sdata != sdata) continue; @@ -1026,5 +1037,4 @@ void ieee80211_roc_purge(struct ieee80211_local *local, } if (work_to_do) __ieee80211_roc_work(local); - mutex_unlock(&local->mtx); } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 0ccb5701c7..c1fa26e094 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Portions - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2021, 2023 Intel Corporation */ #include <net/mac80211.h> #include <net/rtnetlink.h> @@ -40,13 +40,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) if (ieee80211_hw_check(hw, AMPDU_AGGREGATION) && !(wowlan && wowlan->any)) { - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sta, &local->sta_list, list) { set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions( sta, AGG_STOP_LOCAL_REQUEST); } - mutex_unlock(&local->sta_mtx); } /* keep sched_scan only in case of 'any' trigger */ @@ -76,7 +75,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) * Note that this particular timer doesn't need to be * restarted at resume. */ - cancel_work_sync(&local->dynamic_ps_enable_work); + wiphy_work_cancel(local->hw.wiphy, &local->dynamic_ps_enable_work); del_timer_sync(&local->dynamic_ps_timer); local->wowlan = wowlan; @@ -119,12 +118,11 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) local->quiescing = false; local->wowlan = false; if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sta, &local->sta_list, list) { clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } - mutex_unlock(&local->sta_mtx); } ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, @@ -161,7 +159,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) break; } - flush_delayed_work(&sdata->dec_tailroom_needed_wk); + wiphy_delayed_work_flush(local->hw.wiphy, + &sdata->dec_tailroom_needed_wk); drv_remove_interface(local, sdata); } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index b34c805220..6bf3b4444a 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1725,16 +1725,15 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, mi->band = sband->band; mi->last_stats_update = jiffies; - ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0); - mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0); + ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); + mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1); mi->overhead += ack_dur; mi->overhead_rtscts = mi->overhead + 2 * ack_dur; ctl_rate = &sband->bitrates[rate_lowest_index(sband, sta)]; erp = ctl_rate->flags & IEEE80211_RATE_ERP_G; ack_dur = ieee80211_frame_duration(sband->band, 10, - ctl_rate->bitrate, erp, 1, - ieee80211_chandef_get_shift(chandef)); + ctl_rate->bitrate, erp, 1); mi->overhead_legacy = ack_dur; mi->overhead_legacy_rtscts = mi->overhead_legacy + 2 * ack_dur; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 26ca2f5dc5..64352e4e6d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1436,7 +1436,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); rx->link_sta->rx_stats.num_duplicates++; - return RX_DROP_UNUSABLE; + return RX_DROP_U_DUP; } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; } @@ -1490,7 +1490,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) cfg80211_rx_spurious_frame(rx->sdata->dev, hdr->addr2, GFP_ATOMIC)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SPURIOUS; return RX_DROP_MONITOR; } @@ -1883,7 +1883,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int keyidx; - ieee80211_rx_result result = RX_DROP_UNUSABLE; + ieee80211_rx_result result = RX_DROP_U_DECRYPT_FAIL; struct ieee80211_key *sta_ptk = NULL; struct ieee80211_key *ptk_idx = NULL; int mmie_keyidx = -1; @@ -1933,7 +1933,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) keyid = ieee80211_get_keyid(rx->skb); if (unlikely(keyid < 0)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NO_KEY_ID; ptk_idx = rcu_dereference(rx->sta->ptk[keyid]); } @@ -2038,7 +2038,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) keyidx = ieee80211_get_keyid(rx->skb); if (unlikely(keyidx < 0)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NO_KEY_ID; /* check per-station GTK first, if multicast packet */ if (is_multicast_ether_addr(hdr->addr1) && rx->link_sta) @@ -2104,7 +2104,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_gcmp_decrypt(rx); break; default: - result = RX_DROP_UNUSABLE; + result = RX_DROP_U_BAD_CIPHER; } /* the hdr variable is invalid after the decrypt handlers */ @@ -2249,7 +2249,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) I802_DEBUG_INC(rx->local->rx_handlers_fragments); if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; /* * skb_linearize() might change the skb->data and @@ -2312,11 +2312,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; if (!requires_sequential_pn(rx, fc)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NONSEQ_PN; /* Prevent mixed key and fragment cache attacks */ if (entry->key_color != rx->key->color) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_KEY_COLOR; memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { @@ -2327,7 +2327,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) rpn = rx->ccm_gcm.pn; if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_REPLAY; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); } else if (entry->is_protected && (!rx->key || @@ -2338,11 +2338,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * if for TKIP Michael MIC should protect us, and WEP is a * lost cause anyway. */ - return RX_DROP_UNUSABLE; + return RX_DROP_U_EXPECT_DEFRAG_PROT; } else if (entry->is_protected && rx->key && entry->key_color != rx->key->color && (status->flag & RX_FLAG_DECRYPTED)) { - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_KEY_COLOR; } skb_pull(rx->skb, ieee80211_hdrlen(fc)); @@ -2361,7 +2361,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) GFP_ATOMIC))) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); __skb_queue_purge(&entry->skb_list); - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; } } while ((skb = __skb_dequeue(&entry->skb_list))) { @@ -2405,7 +2405,8 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) return 0; } -static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) +static ieee80211_rx_result +ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; @@ -2416,12 +2417,12 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) * decrypted them already. */ if (status->flag & RX_FLAG_DECRYPTED) - return 0; + return RX_CONTINUE; /* drop unicast protected dual (that wasn't protected) */ if (ieee80211_is_action(fc) && mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) - return -EACCES; + return RX_DROP_U_UNPROT_DUAL; if (rx->sta && test_sta_flag(rx->sta, WLAN_STA_MFP)) { if (unlikely(!ieee80211_has_protected(fc) && @@ -2433,13 +2434,13 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) * during 4-way-HS (key is installed after HS). */ if (!rx->key) - return 0; + return RX_CONTINUE; cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, rx->skb->data, rx->skb->len); } - return -EACCES; + return RX_DROP_U_UNPROT_UCAST_MGMT; } /* BIP does not use Protected field, so need to check MMIE */ if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && @@ -2449,14 +2450,14 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, rx->skb->data, rx->skb->len); - return -EACCES; + return RX_DROP_U_UNPROT_MCAST_MGMT; } if (unlikely(ieee80211_is_beacon(fc) && rx->key && ieee80211_get_mmie_keyidx(rx->skb) < 0)) { cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, rx->skb->data, rx->skb->len); - return -EACCES; + return RX_DROP_U_UNPROT_BEACON; } /* * When using MFP, Action frames are not allowed prior to @@ -2464,18 +2465,27 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) */ if (unlikely(ieee80211_is_action(fc) && !rx->key && ieee80211_is_robust_mgmt_frame(rx->skb))) - return -EACCES; + return RX_DROP_U_UNPROT_ACTION; /* drop unicast public action frames when using MPF */ if (is_unicast_ether_addr(mgmt->da) && ieee80211_is_protected_dual_of_public_action(rx->skb)) - return -EACCES; + return RX_DROP_U_UNPROT_UNICAST_PUB_ACTION; } - return 0; + /* + * Drop robust action frames before assoc regardless of MFP state, + * after assoc we also have decided on MFP or not. + */ + if (ieee80211_is_action(fc) && + ieee80211_is_robust_mgmt_frame(rx->skb) && + (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC))) + return RX_DROP_U_UNPROT_ROBUST_ACTION; + + return RX_CONTINUE; } -static int +static ieee80211_rx_result __ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control) { struct ieee80211_sub_if_data *sdata = rx->sdata; @@ -2487,32 +2497,31 @@ __ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control) *port_control = false; if (ieee80211_has_a4(hdr->frame_control) && sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta) - return -1; + return RX_DROP_U_UNEXPECTED_VLAN_4ADDR; if (sdata->vif.type == NL80211_IFTYPE_STATION && !!sdata->u.mgd.use_4addr != !!ieee80211_has_a4(hdr->frame_control)) { - if (!sdata->u.mgd.use_4addr) - return -1; + return RX_DROP_U_UNEXPECTED_STA_4ADDR; else if (!ether_addr_equal(hdr->addr1, sdata->vif.addr)) check_port_control = true; } if (is_multicast_ether_addr(hdr->addr1) && sdata->vif.type == NL80211_IFTYPE_AP_VLAN && sdata->u.vlan.sta) - return -1; + return RX_DROP_U_UNEXPECTED_VLAN_MCAST; ret = ieee80211_data_to_8023(rx->skb, sdata->vif.addr, sdata->vif.type); if (ret < 0) - return ret; + return RX_DROP_U_INVALID_8023; ehdr = (struct ethhdr *) rx->skb->data; if (ehdr->h_proto == rx->sdata->control_port_protocol) *port_control = true; else if (check_port_control) - return -1; + return RX_DROP_U_NOT_PORT_CONTROL; - return 0; + return RX_CONTINUE; } bool ieee80211_is_our_addr(struct ieee80211_sub_if_data *sdata, @@ -2903,10 +2912,10 @@ ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta skb = NULL; if (skb_cow_head(fwd_skb, hdrlen - sizeof(struct ethhdr))) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; if (skb_linearize(fwd_skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; } fwd_hdr = skb_push(fwd_skb, hdrlen - sizeof(struct ethhdr)); @@ -3002,7 +3011,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) rx->sdata->vif.addr, rx->sdata->vif.type, data_offset, true)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_AMSDU; if (rx->sta->amsdu_mesh_control < 0) { s8 valid = -1; @@ -3077,21 +3086,21 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) switch (rx->sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: if (!rx->sdata->u.vlan.sta) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_4ADDR; break; case NL80211_IFTYPE_STATION: if (!rx->sdata->u.mgd.use_4addr) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_4ADDR; break; case NL80211_IFTYPE_MESH_POINT: break; default: - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_4ADDR; } } if (is_multicast_ether_addr(hdr->addr1) || !rx->sta) - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_AMSDU; if (rx->key) { /* @@ -3104,7 +3113,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: case WLAN_CIPHER_SUITE_TKIP: - return RX_DROP_UNUSABLE; + return RX_DROP_U_BAD_AMSDU_CIPHER; default: break; } @@ -3123,7 +3132,6 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) __le16 fc = hdr->frame_control; ieee80211_rx_result res; bool port_control; - int err; if (unlikely(!ieee80211_is_data(hdr->frame_control))) return RX_CONTINUE; @@ -3144,9 +3152,9 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; } - err = __ieee80211_data_to_8023(rx, &port_control); - if (unlikely(err)) - return RX_DROP_UNUSABLE; + res = __ieee80211_data_to_8023(rx, &port_control); + if (unlikely(res != RX_CONTINUE)) + return res; res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb); if (res != RX_CONTINUE) @@ -3378,7 +3386,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) /* drop too small action frames */ if (ieee80211_is_action(mgmt->frame_control) && rx->skb->len < IEEE80211_MIN_ACTION_SIZE) - return RX_DROP_UNUSABLE; + return RX_DROP_U_RUNT_ACTION; if (rx->sdata->vif.type == NL80211_IFTYPE_AP && ieee80211_is_beacon(mgmt->frame_control) && @@ -3399,10 +3407,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) rx->flags |= IEEE80211_RX_BEACON_REPORTED; } - if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_UNUSABLE; - - return RX_CONTINUE; + return ieee80211_drop_unencrypted_mgmt(rx); } static bool @@ -3472,7 +3477,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC && mgmt->u.action.category != WLAN_CATEGORY_SELF_PROTECTED && mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) - return RX_DROP_UNUSABLE; + return RX_DROP_U_ACTION_UNKNOWN_SRC; switch (mgmt->u.action.category) { case WLAN_CATEGORY_HT: @@ -3877,7 +3882,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) - return RX_DROP_UNUSABLE; + return RX_DROP_U_REJECTED_ACTION_RESPONSE; nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0, GFP_ATOMIC); @@ -4668,7 +4673,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct sta_info *sta; - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && @@ -4682,9 +4687,9 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + __ieee80211_check_fast_rx_iface(sdata); - mutex_unlock(&local->sta_mtx); } static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index c1f964e999..d4ed0c0a33 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -2,6 +2,7 @@ /* * S1G handling * Copyright(c) 2020 Adapt-IP + * Copyright (C) 2023 Intel Corporation */ #include <linux/ieee80211.h> #include <net/mac80211.h> @@ -153,11 +154,11 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mgmt->sa); if (!sta) - goto out; + return; switch (mgmt->u.action.u.s1g.action_code) { case WLAN_S1G_TWT_SETUP: @@ -169,9 +170,6 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, default: break; } - -out: - mutex_unlock(&local->sta_mtx); } void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, @@ -181,11 +179,11 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); sta = sta_info_get_bss(sdata, mgmt->da); if (!sta) - goto out; + return; switch (mgmt->u.action.u.s1g.action_code) { case WLAN_S1G_TWT_SETUP: @@ -195,7 +193,4 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, default: break; } - -out: - mutex_unlock(&local->sta_mtx); } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 68ec2124c3..fca3f67ac0 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2015 Intel Mobile Communications GmbH * Copyright 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/if_arp.h> @@ -187,12 +187,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC)) bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal; - bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20; - if (rx_status->bw == RATE_INFO_BW_5) - bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5; - else if (rx_status->bw == RATE_INFO_BW_10) - bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10; - bss_meta.chan = channel; rcu_read_lock(); @@ -222,14 +216,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local, } static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel *channel, u32 scan_flags, const u8 *da) { if (!sdata) return false; - /* accept broadcast for OCE */ - if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP && - is_broadcast_ether_addr(da)) + + /* accept broadcast on 6 GHz and for OCE */ + if (is_broadcast_ether_addr(da) && + (channel->band == NL80211_BAND_6GHZ || + scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP)) return true; + if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR) return true; return ether_addr_equal(da, sdata->vif.addr); @@ -278,6 +276,12 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0); } + channel = ieee80211_get_channel_khz(local->hw.wiphy, + ieee80211_rx_status_to_khz(rx_status)); + + if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + return; + if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; @@ -295,19 +299,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) /* ignore ProbeResp to foreign address or non-bcast (OCE) * unless scanning with randomised address */ - if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags, + if (!ieee80211_scan_accept_presp(sdata1, channel, + scan_req_flags, mgmt->da) && - !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags, + !ieee80211_scan_accept_presp(sdata2, channel, + sched_scan_req_flags, mgmt->da)) return; } - channel = ieee80211_get_channel_khz(local->hw.wiphy, - ieee80211_rx_status_to_khz(rx_status)); - - if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) - return; - bss = ieee80211_bss_info_update(local, rx_status, mgmt, skb->len, channel); @@ -315,22 +315,11 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) ieee80211_rx_bss_put(local, bss); } -static void -ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef, - enum nl80211_bss_scan_width scan_width) +static void ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef) { memset(chandef, 0, sizeof(*chandef)); - switch (scan_width) { - case NL80211_BSS_CHAN_WIDTH_5: - chandef->width = NL80211_CHAN_WIDTH_5; - break; - case NL80211_BSS_CHAN_WIDTH_10: - chandef->width = NL80211_CHAN_WIDTH_10; - break; - default: - chandef->width = NL80211_CHAN_WIDTH_20_NOHT; - break; - } + + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; } /* return false if no more work */ @@ -344,7 +333,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) u32 flags = 0; req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) return false; @@ -378,7 +367,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata) } local->hw_scan_req->req.n_channels = n_chans; - ieee80211_prepare_scan_chandef(&chandef, req->scan_width); + ieee80211_prepare_scan_chandef(&chandef); if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT) flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT; @@ -409,7 +398,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) struct ieee80211_sub_if_data *scan_sdata; struct ieee80211_sub_if_data *sdata; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* * It's ok to abort a not-yet-running scan (that @@ -424,7 +413,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) return; scan_sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (hw_scan && !aborted && !ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) && @@ -433,7 +422,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) rc = drv_hw_scan(local, rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)), + lockdep_is_held(&local->hw.wiphy->mtx)), local->hw_scan_req); if (rc == 0) @@ -450,7 +439,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) local->hw_scan_req = NULL; scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); RCU_INIT_POINTER(local->scan_req, NULL); RCU_INIT_POINTER(local->scan_sdata, NULL); @@ -555,20 +544,18 @@ static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *sdata_iter; + lockdep_assert_wiphy(local->hw.wiphy); + if (!ieee80211_is_radar_required(local)) return true; if (!regulatory_pre_cac_allowed(local->hw.wiphy)) return false; - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata_iter, &local->interfaces, list) { - if (sdata_iter->wdev.cac_started) { - mutex_unlock(&local->iflist_mtx); + if (sdata_iter->wdev.cac_started) return false; - } } - mutex_unlock(&local->iflist_mtx); return true; } @@ -591,7 +578,7 @@ static bool ieee80211_can_scan(struct ieee80211_local *local, void ieee80211_run_deferred_scan(struct ieee80211_local *local) { - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!local->scan_req || local->scanning) return; @@ -599,7 +586,7 @@ void ieee80211_run_deferred_scan(struct ieee80211_local *local) if (!ieee80211_can_scan(local, rcu_dereference_protected( local->scan_sdata, - lockdep_is_held(&local->mtx)))) + lockdep_is_held(&local->hw.wiphy->mtx)))) return; wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, @@ -644,7 +631,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, u32 flags = 0, tx_flags; scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; if (scan_req->no_cck) @@ -655,7 +642,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local, flags |= IEEE80211_PROBE_FLAG_RANDOM_SN; sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); for (i = 0; i < scan_req->n_ssids; i++) ieee80211_send_scan_probe_req( @@ -680,7 +667,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata, bool hw_scan = local->ops->hw_scan; int rc; - lockdep_assert_held(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (local->scan_req) return -EBUSY; @@ -860,12 +847,13 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, enum mac80211_scan_state next_scan_state; struct cfg80211_scan_request *scan_req; + lockdep_assert_wiphy(local->hw.wiphy); + /* * check if at least one STA interface is associated, * check if at least one STA interface has pending tx frames * and grab the lowest used beacon interval */ - mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; @@ -881,10 +869,9 @@ static void ieee80211_scan_state_decision(struct ieee80211_local *local, } } } - mutex_unlock(&local->iflist_mtx); scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); next_chan = scan_req->channels[local->scan_channel_idx]; @@ -921,11 +908,10 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, { int skip; struct ieee80211_channel *chan; - enum nl80211_bss_scan_width oper_scan_width; struct cfg80211_scan_request *scan_req; scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); skip = 0; chan = scan_req->channels[local->scan_channel_idx]; @@ -935,42 +921,21 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local, local->scan_chandef.freq1_offset = chan->freq_offset; local->scan_chandef.center_freq2 = 0; - /* For scanning on the S1G band, ignore scan_width (which is constant - * across all channels) for now since channel width is specific to each - * channel. Detect the required channel width here and likely revisit - * later. Maybe scan_width could be used to build the channel scan list? + /* For scanning on the S1G band, detect the channel width according to + * the channel being scanned. */ if (chan->band == NL80211_BAND_S1GHZ) { local->scan_chandef.width = ieee80211_s1g_channel_width(chan); goto set_channel; } - switch (scan_req->scan_width) { - case NL80211_BSS_CHAN_WIDTH_5: - local->scan_chandef.width = NL80211_CHAN_WIDTH_5; - break; - case NL80211_BSS_CHAN_WIDTH_10: - local->scan_chandef.width = NL80211_CHAN_WIDTH_10; - break; - default: - case NL80211_BSS_CHAN_WIDTH_20: - /* If scanning on oper channel, use whatever channel-type - * is currently in use. - */ - oper_scan_width = cfg80211_chandef_to_scan_width( - &local->_oper_chandef); - if (chan == local->_oper_chandef.chan && - oper_scan_width == scan_req->scan_width) - local->scan_chandef = local->_oper_chandef; - else - local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; - break; - case NL80211_BSS_CHAN_WIDTH_1: - case NL80211_BSS_CHAN_WIDTH_2: - /* shouldn't get here, S1G handled above */ - WARN_ON(1); - break; - } + /* If scanning on oper channel, use whatever channel-type + * is currently in use. + */ + if (chan == local->_oper_chandef.chan) + local->scan_chandef = local->_oper_chandef; + else + local->scan_chandef.width = NL80211_CHAN_WIDTH_20_NOHT; set_channel: if (ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL)) @@ -1051,7 +1016,7 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) unsigned long next_delay = 0; bool aborted; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (!ieee80211_can_run_worker(local)) { aborted = true; @@ -1059,9 +1024,9 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) } sdata = rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); scan_req = rcu_dereference_protected(local->scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); /* When scanning on-channel, the first-callback means completed. */ if (test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning)) { @@ -1075,7 +1040,7 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) } if (!sdata || !scan_req) - goto out; + return; if (!local->scanning) { int rc; @@ -1084,13 +1049,12 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) RCU_INIT_POINTER(local->scan_sdata, NULL); rc = __ieee80211_start_scan(sdata, scan_req); - if (rc) { - /* need to complete scan in cfg80211 */ - rcu_assign_pointer(local->scan_req, scan_req); - aborted = true; - goto out_complete; - } else - goto out; + if (!rc) + return; + /* need to complete scan in cfg80211 */ + rcu_assign_pointer(local->scan_req, scan_req); + aborted = true; + goto out_complete; } clear_bit(SCAN_BEACON_WAIT, &local->scanning); @@ -1138,37 +1102,30 @@ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work) wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, next_delay); - goto out; + return; out_complete: __ieee80211_scan_completed(&local->hw, aborted); -out: - mutex_unlock(&local->mtx); } int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req) { - int res; + lockdep_assert_wiphy(sdata->local->hw.wiphy); - mutex_lock(&sdata->local->mtx); - res = __ieee80211_start_scan(sdata, req); - mutex_unlock(&sdata->local->mtx); - - return res; + return __ieee80211_start_scan(sdata, req); } int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, const u8 *ssid, u8 ssid_len, struct ieee80211_channel **channels, - unsigned int n_channels, - enum nl80211_bss_scan_width scan_width) + unsigned int n_channels) { struct ieee80211_local *local = sdata->local; int ret = -EBUSY, i, n_ch = 0; enum nl80211_band band; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* busy scanning */ if (local->scan_req) @@ -1219,13 +1176,11 @@ int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, local->int_scan_req->ssids = &local->scan_ssid; local->int_scan_req->n_ssids = 1; - local->int_scan_req->scan_width = scan_width; memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN); local->int_scan_req->ssids[0].ssid_len = ssid_len; ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req); unlock: - mutex_unlock(&local->mtx); return ret; } @@ -1252,9 +1207,8 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) * after the scan was completed/aborted. */ - mutex_lock(&local->mtx); if (!local->scan_req) - goto out; + return; /* * We have a scan running and the driver already reported completion, @@ -1264,7 +1218,7 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) if (test_bit(SCAN_HW_SCANNING, &local->scanning) && test_bit(SCAN_COMPLETED, &local->scanning)) { set_bit(SCAN_HW_CANCELLED, &local->scanning); - goto out; + return; } if (test_bit(SCAN_HW_SCANNING, &local->scanning)) { @@ -1276,16 +1230,14 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) if (local->ops->cancel_hw_scan) drv_cancel_hw_scan(local, rcu_dereference_protected(local->scan_sdata, - lockdep_is_held(&local->mtx))); - goto out; + lockdep_is_held(&local->hw.wiphy->mtx))); + return; } wiphy_delayed_work_cancel(local->hw.wiphy, &local->scan_work); /* and clean up */ memset(&local->scan_info, 0, sizeof(local->scan_info)); __ieee80211_scan_completed(&local->hw, true); -out: - mutex_unlock(&local->mtx); } int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, @@ -1300,9 +1252,9 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, u8 *ie; u32 flags = 0; - iebufsz = local->scan_ies_len + req->ie_len; + lockdep_assert_wiphy(local->hw.wiphy); - lockdep_assert_held(&local->mtx); + iebufsz = local->scan_ies_len + req->ie_len; if (!local->ops->sched_scan_start) return -ENOTSUPP; @@ -1324,7 +1276,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, goto out; } - ieee80211_prepare_scan_chandef(&chandef, req->scan_width); + ieee80211_prepare_scan_chandef(&chandef); ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz, &sched_scan_ies, req->ie, @@ -1353,19 +1305,13 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata, struct cfg80211_sched_scan_request *req) { struct ieee80211_local *local = sdata->local; - int ret; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - if (rcu_access_pointer(local->sched_scan_sdata)) { - mutex_unlock(&local->mtx); + if (rcu_access_pointer(local->sched_scan_sdata)) return -EBUSY; - } - - ret = __ieee80211_request_sched_scan_start(sdata, req); - mutex_unlock(&local->mtx); - return ret; + return __ieee80211_request_sched_scan_start(sdata, req); } int ieee80211_request_sched_scan_stop(struct ieee80211_local *local) @@ -1373,25 +1319,21 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_local *local) struct ieee80211_sub_if_data *sched_scan_sdata; int ret = -ENOENT; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - if (!local->ops->sched_scan_stop) { - ret = -ENOTSUPP; - goto out; - } + if (!local->ops->sched_scan_stop) + return -ENOTSUPP; /* We don't want to restart sched scan anymore. */ RCU_INIT_POINTER(local->sched_scan_req, NULL); sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (sched_scan_sdata) { ret = drv_sched_scan_stop(local, sched_scan_sdata); if (!ret) RCU_INIT_POINTER(local->sched_scan_sdata, NULL); } -out: - mutex_unlock(&local->mtx); return ret; } @@ -1408,20 +1350,16 @@ EXPORT_SYMBOL(ieee80211_sched_scan_results); void ieee80211_sched_scan_end(struct ieee80211_local *local) { - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); - if (!rcu_access_pointer(local->sched_scan_sdata)) { - mutex_unlock(&local->mtx); + if (!rcu_access_pointer(local->sched_scan_sdata)) return; - } RCU_INIT_POINTER(local->sched_scan_sdata, NULL); /* If sched scan was aborted by the driver. */ RCU_INIT_POINTER(local->sched_scan_req, NULL); - mutex_unlock(&local->mtx); - cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); } diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 871cdac2d0..55959b0b24 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2018, 2020, 2022 Intel Corporation + * Copyright (C) 2018, 2020, 2022-2023 Intel Corporation */ #include <linux/ieee80211.h> @@ -33,12 +33,14 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, struct cfg80211_chan_def new_vht_chandef = {}; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; + const struct ieee80211_bandwidth_indication *bwi; int secondary_channel_offset = -1; memset(csa_ie, 0, sizeof(*csa_ie)); sec_chan_offs = elems->sec_chan_offs; wide_bw_chansw_ie = elems->wide_bw_chansw_ie; + bwi = elems->bandwidth_indication; if (conn_flags & (IEEE80211_CONN_DISABLE_HT | IEEE80211_CONN_DISABLE_40MHZ)) { @@ -132,7 +134,14 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, break; } - if (wide_bw_chansw_ie) { + if (bwi) { + /* start with the CSA one */ + new_vht_chandef = csa_ie->chandef; + /* and update the width accordingly */ + /* FIXME: support 160/320 */ + ieee80211_chandef_eht_oper(&bwi->info, true, true, + &new_vht_chandef); + } else if (wide_bw_chansw_ie) { u8 new_seg1 = wide_bw_chansw_ie->new_center_freq_seg1; struct ieee80211_vht_operation vht_oper = { .chan_width = diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index e112300caa..bcf3f727fc 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -88,7 +88,6 @@ static const struct rhashtable_params link_sta_rht_params = { .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; -/* Caller must hold local->sta_mtx */ static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { @@ -99,19 +98,36 @@ static int sta_info_hash_del(struct ieee80211_local *local, static int link_sta_info_hash_add(struct ieee80211_local *local, struct link_sta_info *link_sta) { - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + return rhltable_insert(&local->link_sta_hash, - &link_sta->link_hash_node, - link_sta_rht_params); + &link_sta->link_hash_node, link_sta_rht_params); } static int link_sta_info_hash_del(struct ieee80211_local *local, struct link_sta_info *link_sta) { - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + return rhltable_remove(&local->link_sta_hash, - &link_sta->link_hash_node, - link_sta_rht_params); + &link_sta->link_hash_node, link_sta_rht_params); +} + +void ieee80211_purge_sta_txqs(struct sta_info *sta) +{ + struct ieee80211_local *local = sta->sdata->local; + int i; + + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txqi; + + if (!sta->sta.txq[i]) + continue; + + txqi = to_txq_info(sta->sta.txq[i]); + + ieee80211_txq_purge(local, txqi); + } } static void __cleanup_single_sta(struct sta_info *sta) @@ -140,16 +156,7 @@ static void __cleanup_single_sta(struct sta_info *sta) atomic_dec(&ps->num_sta_ps); } - for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - struct txq_info *txqi; - - if (!sta->sta.txq[i]) - continue; - - txqi = to_txq_info(sta->sta.txq[i]); - - ieee80211_txq_purge(local, txqi); - } + ieee80211_purge_sta_txqs(sta); for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); @@ -331,7 +338,7 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, int i = 0; list_for_each_entry_rcu(sta, &local->sta_list, list, - lockdep_is_held(&local->sta_mtx)) { + lockdep_is_held(&local->hw.wiphy->mtx)) { if (sdata != sta->sdata) continue; if (i < idx) { @@ -355,10 +362,9 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id, struct sta_link_alloc *alloc = NULL; struct link_sta_info *link_sta; - link_sta = rcu_access_pointer(sta->link[link_id]); - if (link_sta != &sta->deflink) - lockdep_assert_held(&sta->local->sta_mtx); + lockdep_assert_wiphy(sta->local->hw.wiphy); + link_sta = rcu_access_pointer(sta->link[link_id]); if (WARN_ON(!link_sta)) return; @@ -440,7 +446,6 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) kfree(sta); } -/* Caller must hold local->sta_mtx */ static int sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { @@ -559,8 +564,7 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); - INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); - mutex_init(&sta->ampdu_mlme.mtx); + wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work); #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) { sta->mesh = kzalloc(sizeof(*sta->mesh), gfp); @@ -720,6 +724,8 @@ static int sta_info_insert_check(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; + lockdep_assert_wiphy(sdata->local->hw.wiphy); + /* * Can't be a WARN_ON because it can be triggered through a race: * something inserts a STA (on one CPU) without holding the RTNL @@ -737,7 +743,6 @@ static int sta_info_insert_check(struct sta_info *sta) * for correctness. */ rcu_read_lock(); - lockdep_assert_held(&sdata->local->sta_mtx); if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { rcu_read_unlock(); @@ -811,11 +816,6 @@ ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) } } -/* - * should be called with sta_mtx locked - * this function replaces the mutex lock - * with a RCU lock - */ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; @@ -823,7 +823,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) struct station_info *sinfo = NULL; int err = 0; - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { @@ -887,7 +887,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) struct link_sta_info *link_sta; link_sta = rcu_dereference_protected(sta->link[i], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) continue; @@ -909,11 +909,12 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) /* move reference to rcu-protected */ rcu_read_lock(); - mutex_unlock(&local->sta_mtx); if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); + ieee80211_check_fast_xmit(sta); + return 0; out_remove: if (sta->sta.valid_links) @@ -925,7 +926,6 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) synchronize_net(); out_cleanup: cleanup_single_sta(sta); - mutex_unlock(&local->sta_mtx); kfree(sinfo); rcu_read_lock(); return err; @@ -937,13 +937,11 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) int err; might_sleep(); - - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); err = sta_info_insert_check(sta); if (err) { sta_info_free(local, sta); - mutex_unlock(&local->sta_mtx); rcu_read_lock(); return err; } @@ -1222,7 +1220,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) local = sta->local; sdata = sta->sdata; - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* * Before removing the station from the driver and @@ -1247,7 +1245,7 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) continue; link_sta = rcu_dereference_protected(sta->link[i], - lockdep_is_held(&local->sta_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); link_sta_info_hash_del(local, link_sta); } @@ -1282,6 +1280,8 @@ static int _sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state, bool recalc) { + struct ieee80211_local *local = sta->local; + might_sleep(); if (sta->sta_state == new_state) @@ -1357,6 +1357,24 @@ static int _sta_info_move_state(struct sta_info *sta, } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ieee80211_vif_dec_num_mcast(sta->sdata); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + + /* + * If we have encryption offload, flush (station) queues + * (after ensuring concurrent TX completed) so we won't + * transmit anything later unencrypted if/when keys are + * also removed, which might otherwise happen depending + * on how the hardware offload works. + */ + if (local->ops->set_key) { + synchronize_net(); + if (local->ops->flush_sta) + drv_flush_sta(local, sta->sdata, sta); + else + ieee80211_flush_queues(local, + sta->sdata, + false); + } + ieee80211_clear_fast_xmit(sta); ieee80211_clear_fast_rx(sta); } @@ -1400,26 +1418,28 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) * after _part1 and before _part2! */ + /* + * There's a potential race in _part1 where we set WLAN_STA_BLOCK_BA + * but someone might have just gotten past a check, and not yet into + * queuing the work/creating the data/etc. + * + * Do another round of destruction so that the worker is certainly + * canceled before we later free the station. + * + * Since this is after synchronize_rcu()/synchronize_net() we're now + * certain that nobody can actually hold a reference to the STA and + * be calling e.g. ieee80211_start_tx_ba_session(). + */ + ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); + might_sleep(); - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ret = _sta_info_move_state(sta, IEEE80211_STA_ASSOC, recalc); WARN_ON_ONCE(ret); } - /* Flush queues before removing keys, as that might remove them - * from hardware, and then depending on the offload method, any - * frames sitting on hardware queues might be sent out without - * any encryption at all. - */ - if (local->ops->set_key) { - if (local->ops->flush_sta) - drv_flush_sta(local, sta->sdata, sta); - else - ieee80211_flush_queues(local, sta->sdata, false); - } - /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); @@ -1477,28 +1497,22 @@ int __must_check __sta_info_destroy(struct sta_info *sta) int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct sta_info *sta; - int ret; - mutex_lock(&sdata->local->sta_mtx); - sta = sta_info_get(sdata, addr); - ret = __sta_info_destroy(sta); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + sta = sta_info_get(sdata, addr); + return __sta_info_destroy(sta); } int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct sta_info *sta; - int ret; - mutex_lock(&sdata->local->sta_mtx); - sta = sta_info_get_bss(sdata, addr); - ret = __sta_info_destroy(sta); - mutex_unlock(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); - return ret; + sta = sta_info_get_bss(sdata, addr); + return __sta_info_destroy(sta); } static void sta_info_cleanup(struct timer_list *t) @@ -1538,7 +1552,6 @@ int sta_info_init(struct ieee80211_local *local) } spin_lock_init(&local->tim_lock); - mutex_init(&local->sta_mtx); INIT_LIST_HEAD(&local->sta_list); timer_setup(&local->sta_cleanup, sta_info_cleanup, 0); @@ -1561,11 +1574,11 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) int ret = 0; might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP); WARN_ON(vlans && !sdata->bss); - mutex_lock(&local->sta_mtx); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { if (sdata == sta->sdata || (vlans && sdata->bss == sta->sdata->bss)) { @@ -1589,7 +1602,6 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans) if (!support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sdata); } - mutex_unlock(&local->sta_mtx); return ret; } @@ -1600,7 +1612,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { unsigned long last_active = ieee80211_sta_last_active(sta); @@ -1619,8 +1631,6 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, WARN_ON(__sta_info_destroy(sta)); } } - - mutex_unlock(&local->sta_mtx); } struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, @@ -2714,7 +2724,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) && - !sta->sta.valid_links) { + !sta->sta.valid_links && + ieee80211_rate_valid(&sta->deflink.tx_stats.last_rate)) { sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &sinfo->txrate); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); @@ -2875,7 +2886,9 @@ int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) struct sta_link_alloc *alloc; int ret; - lockdep_assert_held(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)); /* must represent an MLD from the start */ if (WARN_ON(!sta->sta.valid_links)) @@ -2904,7 +2917,9 @@ int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id) { - lockdep_assert_held(&sta->sdata->local->sta_mtx); + lockdep_assert_wiphy(sta->sdata->local->hw.wiphy); + + WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)); sta_remove_link(sta, link_id, false); } @@ -2918,7 +2933,7 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) int ret; link_sta = rcu_dereference_protected(sta->link[link_id], - lockdep_is_held(&sdata->local->sta_mtx)); + lockdep_is_held(&sdata->local->hw.wiphy->mtx)); if (WARN_ON(old_links == new_links || !link_sta)) return -EINVAL; @@ -2933,7 +2948,7 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) sta->sta.valid_links = new_links; - if (!test_sta_flag(sta, WLAN_STA_INSERTED)) + if (WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED))) goto hash; ieee80211_recalc_min_chandef(sdata, link_id); @@ -2962,11 +2977,11 @@ void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id) struct ieee80211_sub_if_data *sdata = sta->sdata; u16 old_links = sta->sta.valid_links; - lockdep_assert_held(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); sta->sta.valid_links &= ~BIT(link_id); - if (test_sta_flag(sta, WLAN_STA_INSERTED)) + if (!WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED))) drv_change_sta_links(sdata->local, sdata, &sta->sta, old_links, sta->sta.valid_links); @@ -3001,7 +3016,7 @@ bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); - return lockdep_is_held(&sta->local->sta_mtx); + return lockdep_is_held(&sta->local->hw.wiphy->mtx); } EXPORT_SYMBOL(lockdep_sta_mutex_held); #endif diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 195b563132..7acf2223e4 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -3,7 +3,7 @@ * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright(c) 2020-2022 Intel Corporation + * Copyright(c) 2020-2023 Intel Corporation */ #ifndef STA_INFO_H @@ -259,9 +259,6 @@ struct tid_ampdu_rx { /** * struct sta_ampdu_mlme - STA aggregation information. * - * @mtx: mutex to protect all TX data (except non-NULL assignments - * to tid_tx[idx], which are protected by the sta spinlock) - * tid_start_tx is also protected by sta->lock. * @tid_rx: aggregation info for Rx per TID -- RCU protected * @tid_rx_token: dialog tokens for valid aggregation sessions * @tid_rx_timer_expired: bitmap indicating on which TIDs the @@ -275,13 +272,13 @@ struct tid_ampdu_rx { * unexpected aggregation related frames outside a session * @work: work struct for starting/stopping aggregation * @tid_tx: aggregation info for Tx per TID - * @tid_start_tx: sessions where start was requested + * @tid_start_tx: sessions where start was requested, not just protected + * by wiphy mutex but also sta->lock * @last_addba_req_time: timestamp of the last addBA request. * @addba_req_num: number of times addBA request has been sent. * @dialog_token_allocator: dialog token enumerator for each new session; */ struct sta_ampdu_mlme { - struct mutex mtx; /* rx */ struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS]; u8 tid_rx_token[IEEE80211_NUM_TIDS]; @@ -291,7 +288,7 @@ struct sta_ampdu_mlme { unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; /* tx */ - struct work_struct work; + struct wiphy_work work; struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS]; struct tid_ampdu_tx *tid_start_tx[IEEE80211_NUM_TIDS]; unsigned long last_addba_req_time[IEEE80211_NUM_TIDS]; @@ -618,8 +615,6 @@ struct link_sta_info { * @sta: station information we share with the driver * @sta_state: duplicates information about station state (for debug) * @rcu_head: RCU head used for freeing this station struct - * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, - * taken from HT/VHT capabilities or VHT operating mode notification * @cparams: CoDel parameters for this station. * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED) * @amsdu_mesh_control: track the mesh A-MSDU format used by the peer: @@ -796,13 +791,10 @@ static inline void sta_info_pre_move_state(struct sta_info *sta, void ieee80211_assign_tid_tx(struct sta_info *sta, int tid, struct tid_ampdu_tx *tid_tx); -static inline struct tid_ampdu_tx * -rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) -{ - return rcu_dereference_protected(sta->ampdu_mlme.tid_tx[tid], - lockdep_is_held(&sta->lock) || - lockdep_is_held(&sta->ampdu_mlme.mtx)); -} +#define rcu_dereference_protected_tid_tx(sta, tid) \ + rcu_dereference_protected((sta)->ampdu_mlme.tid_tx[tid], \ + lockdep_is_held(&(sta)->lock) || \ + lockdep_is_held(&(sta)->local->hw.wiphy->mtx)); /* Maximum number of frames to buffer per power saving station per AC */ #define STA_MAX_TX_BUFFER 64 @@ -827,7 +819,7 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -/* user must hold sta_mtx or be in RCU critical section */ +/* user must hold wiphy mutex or be in RCU critical section */ struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local, const u8 *sta_addr, const u8 *vif_addr); diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 44d83da60a..1708b33cdc 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -184,8 +184,6 @@ static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid) static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *) skb->data; - struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata = sta->sdata; if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; @@ -194,39 +192,6 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) ieee80211_check_pending_bar(sta, hdr->addr1, tid); } - - if (ieee80211_is_action(mgmt->frame_control) && - !ieee80211_has_protected(mgmt->frame_control) && - mgmt->u.action.category == WLAN_CATEGORY_HT && - mgmt->u.action.u.ht_smps.action == WLAN_HT_ACTION_SMPS && - ieee80211_sdata_running(sdata)) { - enum ieee80211_smps_mode smps_mode; - - switch (mgmt->u.action.u.ht_smps.smps_control) { - case WLAN_HT_SMPS_CONTROL_DYNAMIC: - smps_mode = IEEE80211_SMPS_DYNAMIC; - break; - case WLAN_HT_SMPS_CONTROL_STATIC: - smps_mode = IEEE80211_SMPS_STATIC; - break; - case WLAN_HT_SMPS_CONTROL_DISABLED: - default: /* shouldn't happen since we don't send that */ - smps_mode = IEEE80211_SMPS_OFF; - break; - } - - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - /* - * This update looks racy, but isn't -- if we come - * here we've definitely got a station that we're - * talking to, and on a managed interface that can - * only be the AP. And the only other place updating - * this variable in managed mode is before association. - */ - sdata->deflink.smps_mode = smps_mode; - ieee80211_queue_work(&local->hw, &sdata->recalc_smps); - } - } } static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn) @@ -291,7 +256,7 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info, static void ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, struct sk_buff *skb, int retry_count, - int rtap_len, int shift, + int rtap_len, struct ieee80211_tx_status *status) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -342,7 +307,7 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, if (legacy_rate) { rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_RATE)); - *pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift)); + *pos = DIV_ROUND_UP(legacy_rate, 5); /* padding for tx flags */ pos += 2; } @@ -633,7 +598,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, unsigned long flags; spin_lock_irqsave(&local->ack_status_lock, flags); - skb = idr_remove(&local->ack_status_frames, info->ack_frame_id); + skb = idr_remove(&local->ack_status_frames, info->status_data); spin_unlock_irqrestore(&local->ack_status_lock, flags); if (!skb) @@ -695,6 +660,42 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local, } } +static void ieee80211_handle_smps_status(struct ieee80211_sub_if_data *sdata, + bool acked, u16 status_data) +{ + u16 sub_data = u16_get_bits(status_data, IEEE80211_STATUS_SUBDATA_MASK); + enum ieee80211_smps_mode smps_mode = sub_data & 3; + int link_id = (sub_data >> 2); + struct ieee80211_link_data *link; + + if (!sdata || !ieee80211_sdata_running(sdata)) + return; + + if (!acked) + return; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return; + + if (WARN(link_id >= ARRAY_SIZE(sdata->link), + "bad SMPS status link: %d\n", link_id)) + return; + + link = rcu_dereference(sdata->link[link_id]); + if (!link) + return; + + /* + * This update looks racy, but isn't, the only other place + * updating this variable is in managed mode before assoc, + * and we have to be associated to have a status from the + * action frame TX, since we cannot send it while we're not + * associated yet. + */ + link->smps_mode = smps_mode; + wiphy_work_queue(sdata->local->hw.wiphy, &link->u.mgd.recalc_smps); +} + static void ieee80211_report_used_skb(struct ieee80211_local *local, struct sk_buff *skb, bool dropped, ktime_t ack_hwtstamp) @@ -730,12 +731,9 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, if (!sdata) { skb->dev = NULL; } else if (!dropped) { - unsigned int hdr_size = - ieee80211_hdrlen(hdr->frame_control); - /* Check to see if packet is a TDLS teardown packet */ if (ieee80211_is_data(hdr->frame_control) && - (ieee80211_get_tdls_action(skb, hdr_size) == + (ieee80211_get_tdls_action(skb) == WLAN_TDLS_TEARDOWN)) { ieee80211_tdls_td_tx_handle(local, sdata, skb, info->flags); @@ -759,9 +757,24 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, } rcu_read_unlock(); - } else if (info->ack_frame_id) { + } else if (info->status_data_idr) { ieee80211_report_ack_skb(local, skb, acked, dropped, ack_hwtstamp); + } else if (info->status_data) { + struct ieee80211_sub_if_data *sdata; + + rcu_read_lock(); + + sdata = ieee80211_sdata_from_skb(local, skb); + + switch (u16_get_bits(info->status_data, + IEEE80211_STATUS_TYPE_MASK)) { + case IEEE80211_STATUS_TYPE_SMPS: + ieee80211_handle_smps_status(sdata, acked, + info->status_data); + break; + } + rcu_read_unlock(); } if (!dropped && skb->destructor) { @@ -862,7 +875,7 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, } void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, - int retry_count, int shift, bool send_to_cooked, + int retry_count, bool send_to_cooked, struct ieee80211_tx_status *status) { struct sk_buff *skb2; @@ -879,7 +892,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, return; } ieee80211_add_tx_radiotap_header(local, skb, retry_count, - rtap_len, shift, status); + rtap_len, status); /* XXX: is this sufficient for BPF? */ skb_reset_mac_header(skb); @@ -932,14 +945,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, bool acked; bool noack_success; struct ieee80211_bar *bar; - int shift = 0; int tid = IEEE80211_NUM_TIDS; fc = hdr->frame_control; if (status->sta) { sta = container_of(status->sta, struct sta_info, sta); - shift = ieee80211_vif_get_shift(&sta->sdata->vif); if (info->flags & IEEE80211_TX_STATUS_EOSP) clear_sta_flag(sta, WLAN_STA_SP); @@ -1077,11 +1088,11 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, } /* send to monitor interfaces */ - ieee80211_tx_monitor(local, skb, retry_count, shift, + ieee80211_tx_monitor(local, skb, retry_count, send_to_cooked, status); } -void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) +void ieee80211_tx_status_skb(struct ieee80211_hw *hw, struct sk_buff *skb) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_local *local = hw_to_local(hw); @@ -1100,7 +1111,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) ieee80211_tx_status_ext(hw, &status); rcu_read_unlock(); } -EXPORT_SYMBOL(ieee80211_tx_status); +EXPORT_SYMBOL(ieee80211_tx_status_skb); void ieee80211_tx_status_ext(struct ieee80211_hw *hw, struct ieee80211_tx_status *status) diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index a4af3b7675..05a7dff69f 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -21,7 +21,7 @@ /* give usermode some time for retries in setting up the TDLS session */ #define TDLS_PEER_SETUP_TIMEOUT (15 * HZ) -void ieee80211_tdls_peer_del_work(struct work_struct *wk) +void ieee80211_tdls_peer_del_work(struct wiphy *wiphy, struct wiphy_work *wk) { struct ieee80211_sub_if_data *sdata; struct ieee80211_local *local; @@ -30,13 +30,13 @@ void ieee80211_tdls_peer_del_work(struct work_struct *wk) u.mgd.tdls_peer_del_work.work); local = sdata->local; - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); + if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer)) { tdls_dbg(sdata, "TDLS del peer %pM\n", sdata->u.mgd.tdls_peer); sta_info_destroy_addr(sdata, sdata->u.mgd.tdls_peer); eth_zero_addr(sdata->u.mgd.tdls_peer); } - mutex_unlock(&local->mtx); } static void ieee80211_tdls_add_ext_capab(struct ieee80211_link_data *link, @@ -309,7 +309,7 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { /* IEEE802.11ac-2013 Table E-4 */ - u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; + static const u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 }; struct cfg80211_chan_def uc = sta->tdls_chandef; enum nl80211_chan_width max_width = ieee80211_sta_cap_chan_bw(&sta->deflink); @@ -1180,7 +1180,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, return -ENOTSUPP; } - mutex_lock(&local->mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* we don't support concurrent TDLS peer setups */ if (!is_zero_ether_addr(sdata->u.mgd.tdls_peer) && @@ -1208,7 +1208,6 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, ieee80211_flush_queues(local, sdata, false); memcpy(sdata->u.mgd.tdls_peer, peer, ETH_ALEN); - mutex_unlock(&local->mtx); /* we cannot take the mutex while preparing the setup packet */ ret = ieee80211_tdls_prep_mgmt_packet(wiphy, dev, peer, @@ -1218,19 +1217,16 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev, extra_ies, extra_ies_len, 0, NULL); if (ret < 0) { - mutex_lock(&local->mtx); eth_zero_addr(sdata->u.mgd.tdls_peer); - mutex_unlock(&local->mtx); return ret; } - ieee80211_queue_delayed_work(&sdata->local->hw, - &sdata->u.mgd.tdls_peer_del_work, - TDLS_PEER_SETUP_TIMEOUT); + wiphy_delayed_work_queue(sdata->local->hw.wiphy, + &sdata->u.mgd.tdls_peer_del_work, + TDLS_PEER_SETUP_TIMEOUT); return 0; out_unlock: - mutex_unlock(&local->mtx); return ret; } @@ -1322,7 +1318,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, * response frame. It is transmitted directly and not buffered * by the AP. */ - drv_mgd_protect_tdls_discover(sdata->local, sdata); + drv_mgd_protect_tdls_discover(sdata->local, sdata, link_id); fallthrough; case WLAN_TDLS_SETUP_CONFIRM: case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: @@ -1354,9 +1350,10 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, enum nl80211_chan_width width; struct ieee80211_supported_band *sband; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (conf) { width = conf->def.width; sband = local->hw.wiphy->bands[conf->def.chan->band]; @@ -1384,7 +1381,6 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, } } - mutex_unlock(&local->chanctx_mtx); } static int iee80211_tdls_have_ht_peers(struct ieee80211_sub_if_data *sdata) @@ -1447,6 +1443,8 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_local *local = sdata->local; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)) return -ENOTSUPP; @@ -1467,35 +1465,26 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, /* protect possible bss_conf changes and avoid concurrency in * ieee80211_bss_info_change_notify() */ - sdata_lock(sdata); - mutex_lock(&local->mtx); tdls_dbg(sdata, "TDLS oper %d peer %pM\n", oper, peer); switch (oper) { case NL80211_TDLS_ENABLE_LINK: if (sdata->vif.bss_conf.csa_active) { tdls_dbg(sdata, "TDLS: disallow link during CSA\n"); - ret = -EBUSY; - break; + return -EBUSY; } - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, peer); - if (!sta) { - mutex_unlock(&local->sta_mtx); - ret = -ENOLINK; - break; - } + if (!sta) + return -ENOLINK; iee80211_tdls_recalc_chanctx(sdata, sta); iee80211_tdls_recalc_ht_protection(sdata, sta); set_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH); - mutex_unlock(&local->sta_mtx); WARN_ON_ONCE(is_zero_ether_addr(sdata->u.mgd.tdls_peer) || !ether_addr_equal(sdata->u.mgd.tdls_peer, peer)); - ret = 0; break; case NL80211_TDLS_DISABLE_LINK: /* @@ -1514,29 +1503,26 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, ret = sta_info_destroy_addr(sdata, peer); - mutex_lock(&local->sta_mtx); iee80211_tdls_recalc_ht_protection(sdata, NULL); - mutex_unlock(&local->sta_mtx); iee80211_tdls_recalc_chanctx(sdata, NULL); + if (ret) + return ret; break; default: - ret = -ENOTSUPP; - break; + return -ENOTSUPP; } - if (ret == 0 && ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { - cancel_delayed_work(&sdata->u.mgd.tdls_peer_del_work); + if (ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) { + wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + &sdata->u.mgd.tdls_peer_del_work); eth_zero_addr(sdata->u.mgd.tdls_peer); } - if (ret == 0) - wiphy_work_queue(sdata->local->hw.wiphy, - &sdata->deflink.u.mgd.request_smps_work); + wiphy_work_queue(sdata->local->hw.wiphy, + &sdata->deflink.u.mgd.request_smps_work); - mutex_unlock(&local->mtx); - sdata_unlock(sdata); - return ret; + return 0; } void ieee80211_tdls_oper_request(struct ieee80211_vif *vif, const u8 *peer, @@ -1669,11 +1655,12 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, u32 ch_sw_tm_ie; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + if (chandef->chan->freq_offset) /* this may work, but is untested */ return -EOPNOTSUPP; - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, addr); if (!sta) { tdls_dbg(sdata, @@ -1703,7 +1690,6 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev, set_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL); out: - mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(skb); return ret; } @@ -1717,26 +1703,24 @@ ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; struct sta_info *sta; - mutex_lock(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + sta = sta_info_get(sdata, addr); if (!sta) { tdls_dbg(sdata, "Invalid TDLS peer %pM for channel switch cancel\n", addr); - goto out; + return; } if (!test_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL)) { tdls_dbg(sdata, "TDLS channel switch not initiated by %pM\n", addr); - goto out; + return; } drv_tdls_cancel_channel_switch(local, sdata, &sta->sta); clear_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL); - -out: - mutex_unlock(&local->sta_mtx); } static struct sk_buff * @@ -1798,6 +1782,8 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_tdls_ch_sw_params params = {}; int ret; + lockdep_assert_wiphy(local->hw.wiphy); + params.action_code = WLAN_TDLS_CHANNEL_SWITCH_RESPONSE; params.timestamp = rx_status->device_timestamp; @@ -1807,7 +1793,6 @@ ieee80211_process_tdls_channel_switch_resp(struct ieee80211_sub_if_data *sdata, return -EINVAL; } - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, tf->sa); if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) { tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n", @@ -1870,7 +1855,6 @@ call_drv: tf->sa, params.status); out: - mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); kfree(elems); return ret; @@ -1896,6 +1880,8 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_tdls_ch_sw_params params = {}; int ret = 0; + lockdep_assert_wiphy(local->hw.wiphy); + params.action_code = WLAN_TDLS_CHANNEL_SWITCH_REQUEST; params.timestamp = rx_status->device_timestamp; @@ -1984,7 +1970,6 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, goto free; } - mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, tf->sa); if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) { tdls_dbg(sdata, "TDLS chan switch from non-peer sta %pM\n", @@ -2031,7 +2016,6 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata, tf->sa, params.chandef->chan->center_freq, params.chandef->width); out: - mutex_unlock(&local->sta_mtx); dev_kfree_skb_any(params.tmpl_skb); free: kfree(elems); diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile new file mode 100644 index 0000000000..4814584f8a --- /dev/null +++ b/net/mac80211/tests/Makefile @@ -0,0 +1,3 @@ +mac80211-tests-y += module.o elems.o + +obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o diff --git a/net/mac80211/tests/elems.c b/net/mac80211/tests/elems.c new file mode 100644 index 0000000000..997d0cd27b --- /dev/null +++ b/net/mac80211/tests/elems.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for element parsing + * + * Copyright (C) 2023 Intel Corporation + */ +#include <kunit/test.h> +#include "../ieee80211_i.h" + +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); + +static void mle_defrag(struct kunit *test) +{ + struct ieee80211_elems_parse_params parse_params = { + .link_id = 12, + .from_ap = true, + }; + struct ieee802_11_elems *parsed; + struct sk_buff *skb; + u8 *len_mle, *len_prof; + int i; + + skb = alloc_skb(1024, GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, skb); + + if (skb_pad(skb, skb_tailroom(skb))) { + KUNIT_FAIL(test, "failed to pad skb"); + return; + } + + /* build a multi-link element */ + skb_put_u8(skb, WLAN_EID_EXTENSION); + len_mle = skb_put(skb, 1); + skb_put_u8(skb, WLAN_EID_EXT_EHT_MULTI_LINK); + + put_unaligned_le16(IEEE80211_ML_CONTROL_TYPE_BASIC, + skb_put(skb, 2)); + /* struct ieee80211_mle_basic_common_info */ + skb_put_u8(skb, 7); /* includes len field */ + skb_put_data(skb, "\x00\x00\x00\x00\x00\x00", ETH_ALEN); /* MLD addr */ + + /* with a STA profile inside */ + skb_put_u8(skb, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE); + len_prof = skb_put(skb, 1); + put_unaligned_le16(IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE | + parse_params.link_id, + skb_put(skb, 2)); + skb_put_u8(skb, 1); /* fake sta_info_len - includes itself */ + /* put a bunch of useless elements into it */ + for (i = 0; i < 20; i++) { + skb_put_u8(skb, WLAN_EID_SSID); + skb_put_u8(skb, 20); + skb_put(skb, 20); + } + + /* fragment STA profile */ + ieee80211_fragment_element(skb, len_prof, + IEEE80211_MLE_SUBELEM_FRAGMENT); + /* fragment MLE */ + ieee80211_fragment_element(skb, len_mle, WLAN_EID_FRAGMENT); + + parse_params.start = skb->data; + parse_params.len = skb->len; + parsed = ieee802_11_parse_elems_full(&parse_params); + /* should return ERR_PTR or valid, not NULL */ + KUNIT_EXPECT_NOT_NULL(test, parsed); + + if (IS_ERR_OR_NULL(parsed)) + goto free_skb; + + KUNIT_EXPECT_NOT_NULL(test, parsed->ml_basic_elem); + KUNIT_EXPECT_EQ(test, + parsed->ml_basic_len, + 2 /* control */ + + 7 /* common info */ + + 2 /* sta profile element header */ + + 3 /* sta profile header */ + + 20 * 22 /* sta profile data */ + + 2 /* sta profile fragment element */); + KUNIT_EXPECT_NOT_NULL(test, parsed->prof); + KUNIT_EXPECT_EQ(test, + parsed->sta_prof_len, + 3 /* sta profile header */ + + 20 * 22 /* sta profile data */); + + kfree(parsed); +free_skb: + kfree_skb(skb); +} + +static struct kunit_case element_parsing_test_cases[] = { + KUNIT_CASE(mle_defrag), + {} +}; + +static struct kunit_suite element_parsing = { + .name = "mac80211-element-parsing", + .test_cases = element_parsing_test_cases, +}; + +kunit_test_suite(element_parsing); diff --git a/net/mac80211/tests/module.c b/net/mac80211/tests/module.c new file mode 100644 index 0000000000..9d05f29439 --- /dev/null +++ b/net/mac80211/tests/module.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This is just module boilerplate for the mac80211 kunit module. + * + * Copyright (C) 2023 Intel Corporation + */ +#include <linux/module.h> + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("tests for mac80211"); diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index b8c53b4a71..032718d5b2 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2839,23 +2839,26 @@ TRACE_EVENT(api_sta_block_awake, ); TRACE_EVENT(api_chswitch_done, - TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success), + TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success, + unsigned int link_id), - TP_ARGS(sdata, success), + TP_ARGS(sdata, success, link_id), TP_STRUCT__entry( VIF_ENTRY __field(bool, success) + __field(unsigned int, link_id) ), TP_fast_assign( VIF_ASSIGN; __entry->success = success; + __entry->link_id = link_id; ), TP_printk( - VIF_PR_FMT " success=%d", - VIF_PR_ARG, __entry->success + VIF_PR_FMT " success=%d link_id=%d", + VIF_PR_ARG, __entry->success, __entry->link_id ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d45d4be63d..a85918594c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * Transmit and frame generation functions. */ @@ -43,7 +43,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, struct sk_buff *skb, int group_addr, int next_frag_len) { - int rate, mrate, erp, dur, i, shift = 0; + int rate, mrate, erp, dur, i; struct ieee80211_rate *txrate; struct ieee80211_local *local = tx->local; struct ieee80211_supported_band *sband; @@ -58,10 +58,8 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, rcu_read_lock(); chanctx_conf = rcu_dereference(tx->sdata->vif.bss_conf.chanctx_conf); - if (chanctx_conf) { - shift = ieee80211_chandef_get_shift(&chanctx_conf->def); + if (chanctx_conf) rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); - } rcu_read_unlock(); /* uh huh? */ @@ -143,7 +141,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, continue; if (tx->sdata->vif.bss_conf.basic_rates & BIT(i)) - rate = DIV_ROUND_UP(r->bitrate, 1 << shift); + rate = r->bitrate; switch (sband->band) { case NL80211_BAND_2GHZ: @@ -173,7 +171,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, if (rate == -1) { /* No matching basic rate found; use highest suitable mandatory * PHY rate */ - rate = DIV_ROUND_UP(mrate, 1 << shift); + rate = mrate; } /* Don't calculate ACKs for QoS Frames with NoAck Policy set */ @@ -185,8 +183,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, * (10 bytes + 4-byte FCS = 112 bits) plus SIFS; rounded up * to closest integer */ dur = ieee80211_frame_duration(sband->band, 10, rate, erp, - tx->sdata->vif.bss_conf.use_short_preamble, - shift); + tx->sdata->vif.bss_conf.use_short_preamble); if (next_frag_len) { /* Frame is fragmented: duration increases with time needed to @@ -195,8 +192,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, /* next fragment */ dur += ieee80211_frame_duration(sband->band, next_frag_len, txrate->bitrate, erp, - tx->sdata->vif.bss_conf.use_short_preamble, - shift); + tx->sdata->vif.bss_conf.use_short_preamble); } return cpu_to_le16(dur); @@ -266,8 +262,8 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx) IEEE80211_QUEUE_STOP_REASON_PS, false); ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; - ieee80211_queue_work(&local->hw, - &local->dynamic_ps_disable_work); + wiphy_work_queue(local->hw.wiphy, + &local->dynamic_ps_disable_work); } /* Don't restart the timer if we're not disassociated */ @@ -2167,6 +2163,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, rate_found = true; break; + case IEEE80211_RADIOTAP_ANTENNA: + /* this can appear multiple times, keep a bitmap */ + info->control.antennas |= BIT(*iterator.this_arg); + break; + case IEEE80211_RADIOTAP_DATA_RETRIES: rate_retries = *iterator.this_arg; break; @@ -2261,8 +2262,17 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, } if (rate_flags & IEEE80211_TX_RC_MCS) { + /* reset antennas if not enough */ + if (IEEE80211_HT_MCS_CHAINS(rate) > + hweight8(info->control.antennas)) + info->control.antennas = 0; + info->control.rates[0].idx = rate; } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { + /* reset antennas if not enough */ + if (vht_nss > hweight8(info->control.antennas)) + info->control.antennas = 0; + ieee80211_rate_set_vht(info->control.rates, vht_mcs, vht_nss); } else if (sband) { @@ -2856,9 +2866,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, goto free; } - if (unlikely(!multicast && ((skb->sk && - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || - ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS))) + if (unlikely(!multicast && + ((skb->sk && + skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) || + ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS))) info_id = ieee80211_store_ack_skb(local, skb, &info_flags, cookie); @@ -2942,7 +2953,10 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, memset(info, 0, sizeof(*info)); info->flags = info_flags; - info->ack_frame_id = info_id; + if (info_id) { + info->status_data = info_id; + info->status_data_idr = 1; + } info->band = band; if (likely(!cookie)) { @@ -3034,7 +3048,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) sdata->vif.type == NL80211_IFTYPE_STATION) goto out; - if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded) goto out; if (test_sta_flag(sta, WLAN_STA_PS_STA) || @@ -3086,10 +3100,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) /* DA SA BSSID */ build.da_offs = offsetof(struct ieee80211_hdr, addr1); build.sa_offs = offsetof(struct ieee80211_hdr, addr2); + rcu_read_lock(); link = rcu_dereference(sdata->link[tdls_link_id]); - if (WARN_ON_ONCE(!link)) - break; - memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN); + if (!WARN_ON_ONCE(!link)) + memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN); + rcu_read_unlock(); build.hdr_len = 24; break; } @@ -3912,6 +3927,7 @@ begin: goto begin; skb = __skb_dequeue(&tx.skbs); + info = IEEE80211_SKB_CB(skb); if (!skb_queue_empty(&tx.skbs)) { spin_lock_bh(&fq->lock); @@ -3956,7 +3972,7 @@ begin: } encap_out: - IEEE80211_SKB_CB(skb)->control.vif = vif; + info->control.vif = vif; if (tx.sta && wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) { @@ -4475,6 +4491,8 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev, * @dev: incoming interface * * On failure skb will be freed. + * + * Returns: the netdev TX status (but really only %NETDEV_TX_OK) */ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -4639,9 +4657,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, } if (unlikely(skb->sk && - skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) - info->ack_frame_id = ieee80211_store_ack_skb(local, skb, - &info->flags, NULL); + skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) { + info->status_data = ieee80211_store_ack_skb(local, skb, + &info->flags, NULL); + if (info->status_data) + info->status_data_idr = 1; + } dev_sw_netstats_tx_add(dev, skbs, len); sta->deflink.tx_stats.packets[queue] += skbs; @@ -5550,7 +5571,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, IEEE80211_INCLUDE_ALL_MBSSID_ELEMS, NULL); struct sk_buff *copy; - int shift; if (!bcn) return bcn; @@ -5570,8 +5590,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!copy) return bcn; - shift = ieee80211_vif_get_shift(vif); - ieee80211_tx_monitor(hw_to_local(hw), copy, 1, shift, false, NULL); + ieee80211_tx_monitor(hw_to_local(hw), copy, 1, false, NULL); return bcn; } @@ -5921,7 +5940,7 @@ int ieee80211_reserve_tid(struct ieee80211_sta *pubsta, u8 tid) int ret; u32 queues; - lockdep_assert_held(&local->sta_mtx); + lockdep_assert_wiphy(local->hw.wiphy); /* only some cases are supported right now */ switch (sdata->vif.type) { @@ -5982,7 +6001,7 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *pubsta, u8 tid) struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; - lockdep_assert_held(&sdata->local->sta_mtx); + lockdep_assert_wiphy(sdata->local->hw.wiphy); /* only some cases are supported right now */ switch (sdata->vif.type) { @@ -6103,6 +6122,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, u32 flags = 0; int err; + /* mutex lock is only needed for incrementing the cookie counter */ + lockdep_assert_wiphy(local->hw.wiphy); + /* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE * or Pre-Authentication */ @@ -6193,15 +6215,10 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, rcu_read_unlock(); start_xmit: - /* mutex lock is only needed for incrementing the cookie counter */ - mutex_lock(&local->mtx); - local_bh_disable(); __ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags, cookie); local_bh_enable(); - mutex_unlock(&local->mtx); - return 0; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 172173b2a9..ed680120d5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -24,6 +24,7 @@ #include <net/net_namespace.h> #include <net/cfg80211.h> #include <net/rtnetlink.h> +#include <kunit/visibility.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -109,8 +110,7 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx) } int ieee80211_frame_duration(enum nl80211_band band, size_t len, - int rate, int erp, int short_preamble, - int shift) + int rate, int erp, int short_preamble) { int dur; @@ -121,9 +121,6 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len, * * rate is in 100 kbps, so divident is multiplied by 10 in the * DIV_ROUND_UP() operations. - * - * shift may be 2 for 5 MHz channels or 1 for 10 MHz channels, and - * is assumed to be 0 otherwise. */ if (band == NL80211_BAND_5GHZ || erp) { @@ -144,12 +141,6 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len, dur += 16; /* IEEE 802.11-2012 18.3.2.4: T_PREAMBLE = 16 usec */ dur += 4; /* IEEE 802.11-2012 18.3.2.4: T_SIGNAL = 4 usec */ - /* IEEE 802.11-2012 18.3.2.4: all values above are: - * * times 4 for 5 MHz - * * times 2 for 10 MHz - */ - dur *= 1 << shift; - /* rates should already consider the channel bandwidth, * don't apply divisor again. */ @@ -184,7 +175,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, { struct ieee80211_sub_if_data *sdata; u16 dur; - int erp, shift = 0; + int erp; bool short_preamble = false; erp = 0; @@ -193,11 +184,10 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->deflink.operating_11g_mode) erp = rate->flags & IEEE80211_RATE_ERP_G; - shift = ieee80211_vif_get_shift(vif); } dur = ieee80211_frame_duration(band, frame_len, rate->bitrate, erp, - short_preamble, shift); + short_preamble); return cpu_to_le16(dur); } @@ -211,7 +201,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, struct ieee80211_rate *rate; struct ieee80211_sub_if_data *sdata; bool short_preamble; - int erp, shift = 0, bitrate; + int erp, bitrate; u16 dur; struct ieee80211_supported_band *sband; @@ -227,20 +217,19 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->deflink.operating_11g_mode) erp = rate->flags & IEEE80211_RATE_ERP_G; - shift = ieee80211_vif_get_shift(vif); } - bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift); + bitrate = rate->bitrate; /* CTS duration */ dur = ieee80211_frame_duration(sband->band, 10, bitrate, - erp, short_preamble, shift); + erp, short_preamble); /* Data frame duration */ dur += ieee80211_frame_duration(sband->band, frame_len, bitrate, - erp, short_preamble, shift); + erp, short_preamble); /* ACK duration */ dur += ieee80211_frame_duration(sband->band, 10, bitrate, - erp, short_preamble, shift); + erp, short_preamble); return cpu_to_le16(dur); } @@ -255,7 +244,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, struct ieee80211_rate *rate; struct ieee80211_sub_if_data *sdata; bool short_preamble; - int erp, shift = 0, bitrate; + int erp, bitrate; u16 dur; struct ieee80211_supported_band *sband; @@ -270,18 +259,17 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->deflink.operating_11g_mode) erp = rate->flags & IEEE80211_RATE_ERP_G; - shift = ieee80211_vif_get_shift(vif); } - bitrate = DIV_ROUND_UP(rate->bitrate, 1 << shift); + bitrate = rate->bitrate; /* Data frame duration */ dur = ieee80211_frame_duration(sband->band, frame_len, bitrate, - erp, short_preamble, shift); + erp, short_preamble); if (!(frame_txctl->flags & IEEE80211_TX_CTL_NO_ACK)) { /* ACK duration */ dur += ieee80211_frame_duration(sband->band, 10, bitrate, - erp, short_preamble, shift); + erp, short_preamble); } return cpu_to_le16(dur); @@ -705,6 +693,19 @@ void __ieee80211_flush_queues(struct ieee80211_local *local, IEEE80211_QUEUE_STOP_REASON_FLUSH, false); + if (drop) { + struct sta_info *sta; + + /* Purge the queues, so the frames on them won't be + * sent during __ieee80211_wake_queue() + */ + list_for_each_entry(sta, &local->sta_list, list) { + if (sdata != sta->sdata) + continue; + ieee80211_purge_sta_txqs(sta); + } + } + drv_flush(local, sdata, queues, drop); ieee80211_wake_queues_by_reason(&local->hw, queues, @@ -1002,6 +1003,19 @@ ieee80211_parse_extension_element(u32 *crc, } } break; + case WLAN_EID_EXT_BANDWIDTH_INDICATION: + if (ieee80211_bandwidth_indication_size_ok(data, len)) + elems->bandwidth_indication = data; + calc_crc = true; + break; + case WLAN_EID_EXT_TID_TO_LINK_MAPPING: + calc_crc = true; + if (ieee80211_tid_to_link_map_size_ok(data, len) && + elems->ttlm_num < ARRAY_SIZE(elems->ttlm)) { + elems->ttlm[elems->ttlm_num] = (void *)data; + elems->ttlm_num++; + } + break; } if (crc && calc_crc) @@ -1017,11 +1031,11 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, bool calc_crc = params->filter != 0; DECLARE_BITMAP(seen_elems, 256); u32 crc = params->crc; - const u8 *ie; bitmap_zero(seen_elems, 256); for_each_element(elem, params->start, params->len) { + const struct element *subelem; bool elem_parse_failed; u8 id = elem->id; u8 elen = elem->datalen; @@ -1279,15 +1293,27 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, } /* * This is a bit tricky, but as we only care about - * the wide bandwidth channel switch element, so - * just parse it out manually. + * a few elements, parse them out manually. */ - ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, - pos, elen); - if (ie) { - if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie)) + subelem = cfg80211_find_elem(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, + pos, elen); + if (subelem) { + if (subelem->datalen >= sizeof(*elems->wide_bw_chansw_ie)) elems->wide_bw_chansw_ie = - (void *)(ie + 2); + (void *)subelem->data; + else + elem_parse_failed = true; + } + + subelem = cfg80211_find_ext_elem(WLAN_EID_EXT_BANDWIDTH_INDICATION, + pos, elen); + if (subelem) { + const void *edata = subelem->data + 1; + u8 edatalen = subelem->datalen - 1; + + if (ieee80211_bandwidth_indication_size_ok(edata, + edatalen)) + elems->bandwidth_indication = edata; else elem_parse_failed = true; } @@ -1599,7 +1625,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) int nontransmitted_profile_len = 0; size_t scratch_len = 3 * params->len; - elems = kzalloc(sizeof(*elems) + scratch_len, GFP_ATOMIC); + elems = kzalloc(struct_size(elems, scratch, scratch_len), GFP_ATOMIC); if (!elems) return NULL; elems->ie_start = params->start; @@ -1654,6 +1680,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) return elems; } +EXPORT_SYMBOL_IF_KUNIT(ieee802_11_parse_elems_full); void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params @@ -1942,7 +1969,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, u8 rates[32]; int num_rates; int ext_rates_len; - int shift; u32 rate_flags; bool have_80mhz = false; @@ -1953,7 +1979,6 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, return 0; rate_flags = ieee80211_chandef_rate_flags(chandef); - shift = ieee80211_chandef_get_shift(chandef); /* For direct scan add S1G IE and consider its override bits */ if (band == NL80211_BAND_S1GHZ) { @@ -1971,8 +1996,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata, continue; rates[num_rates++] = - (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate, - (1 << shift) * 5); + (u8) DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); } supp_rates_len = min_t(int, num_rates, 8); @@ -2265,14 +2289,13 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; size_t num_rates; u32 supp_rates, rate_flags; - int i, j, shift; + int i, j; sband = sdata->local->hw.wiphy->bands[band]; if (WARN_ON(!sband)) return 1; rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); - shift = ieee80211_vif_get_shift(&sdata->vif); num_rates = sband->n_bitrates; supp_rates = 0; @@ -2298,8 +2321,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata, != rate_flags) continue; - brate = DIV_ROUND_UP(sband->bitrates[j].bitrate, - 1 << shift); + brate = sband->bitrates[j].bitrate; if (brate == own_rate) { supp_rates |= BIT(j); @@ -2316,9 +2338,10 @@ void ieee80211_stop_device(struct ieee80211_local *local) ieee80211_led_radio(local, false); ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO); - cancel_work_sync(&local->reconfig_filter); + wiphy_work_cancel(local->hw.wiphy, &local->reconfig_filter); flush_workqueue(local->workqueue); + wiphy_work_flush(local->hw.wiphy, NULL); drv_stop(local); } @@ -2350,6 +2373,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; struct ieee80211_chanctx *ctx; + lockdep_assert_wiphy(local->hw.wiphy); + /* * We get here if during resume the device can't be restarted properly. * We might also get here if this happens during HW reset, which is a @@ -2378,10 +2403,8 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local) /* Mark channel contexts as not being in the driver any more to avoid * removing them from the driver during the shutdown process... */ - mutex_lock(&local->chanctx_mtx); list_for_each_entry(ctx, &local->chanctx_list, list) ctx->driver_present = false; - mutex_unlock(&local->chanctx_mtx); } static void ieee80211_assign_chanctx(struct ieee80211_local *local, @@ -2391,17 +2414,17 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local, struct ieee80211_chanctx_conf *conf; struct ieee80211_chanctx *ctx; + lockdep_assert_wiphy(local->hw.wiphy); + if (!local->use_chanctx) return; - mutex_lock(&local->chanctx_mtx); conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (conf) { ctx = container_of(conf, struct ieee80211_chanctx, conf); drv_assign_vif_chanctx(local, sdata, link->conf, ctx); } - mutex_unlock(&local->chanctx_mtx); } static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) @@ -2409,8 +2432,9 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct sta_info *sta; + lockdep_assert_wiphy(local->hw.wiphy); + /* add STAs back */ - mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { enum ieee80211_sta_state state; @@ -2422,7 +2446,6 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) WARN_ON(drv_sta_state(local, sta->sdata, sta, state, state + 1)); } - mutex_unlock(&local->sta_mtx); } static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) @@ -2509,6 +2532,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) bool suspended = local->suspended; bool in_reconfig = false; + lockdep_assert_wiphy(local->hw.wiphy); + /* nothing to do if HW shouldn't run */ if (!local->open_count) goto wake_up; @@ -2624,12 +2649,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* add channel contexts */ if (local->use_chanctx) { - mutex_lock(&local->chanctx_mtx); list_for_each_entry(ctx, &local->chanctx_list, list) if (ctx->replace_state != IEEE80211_CHANCTX_REPLACES_OTHER) WARN_ON(drv_add_chanctx(local, ctx)); - mutex_unlock(&local->chanctx_mtx); sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); @@ -2663,7 +2686,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; - sdata_lock(sdata); if (ieee80211_vif_is_mld(&sdata->vif)) { struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS] = { [0] = &sdata->vif.bss_conf, @@ -2795,7 +2817,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_NAN: res = ieee80211_reconfig_nan(sdata); if (res < 0) { - sdata_unlock(sdata); ieee80211_handle_reconfig_failure(local); return res; } @@ -2813,7 +2834,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(1); break; } - sdata_unlock(sdata); if (active_links) ieee80211_set_active_links(&sdata->vif, active_links); @@ -2843,7 +2863,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; - sdata_lock(sdata); switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_AP: @@ -2852,7 +2871,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) default: break; } - sdata_unlock(sdata); } /* add back keys */ @@ -2860,11 +2878,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_reenable_keys(sdata); /* Reconfigure sched scan if it was interrupted by FW restart */ - mutex_lock(&local->mtx); sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); sched_scan_req = rcu_dereference_protected(local->sched_scan_req, - lockdep_is_held(&local->mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); if (sched_scan_sdata && sched_scan_req) /* * Sched scan stopped, but we don't want to report it. Instead, @@ -2880,7 +2897,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) RCU_INIT_POINTER(local->sched_scan_req, NULL); sched_scan_stopped = true; } - mutex_unlock(&local->mtx); if (sched_scan_stopped) cfg80211_sched_scan_stopped_locked(local->hw.wiphy, 0); @@ -2901,16 +2917,12 @@ int ieee80211_reconfig(struct ieee80211_local *local) * are active. This is really a workaround though. */ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) { - mutex_lock(&local->sta_mtx); - list_for_each_entry(sta, &local->sta_list, list) { if (!local->resuming) ieee80211_sta_tear_down_BA_sessions( sta, AGG_STOP_LOCAL_REQUEST); clear_sta_flag(sta, WLAN_STA_BLOCK_BA); } - - mutex_unlock(&local->sta_mtx); } /* @@ -2926,9 +2938,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) barrier(); /* Restart deferred ROCs */ - mutex_lock(&local->mtx); ieee80211_start_next_roc(local); - mutex_unlock(&local->mtx); /* Requeue all works */ list_for_each_entry(sdata, &local->interfaces, list) @@ -2989,6 +2999,8 @@ static void ieee80211_reconfig_disconnect(struct ieee80211_vif *vif, u8 flag) sdata = vif_to_sdata(vif); local = sdata->local; + lockdep_assert_wiphy(local->hw.wiphy); + if (WARN_ON(flag & IEEE80211_SDATA_DISCONNECT_RESUME && !local->resuming)) return; @@ -3002,10 +3014,8 @@ static void ieee80211_reconfig_disconnect(struct ieee80211_vif *vif, u8 flag) sdata->flags |= flag; - mutex_lock(&local->key_mtx); list_for_each_entry(key, &sdata->key_list, list) key->flags |= KEY_FLAG_TAINTED; - mutex_unlock(&local->key_mtx); } void ieee80211_hw_restart_disconnect(struct ieee80211_vif *vif) @@ -3027,10 +3037,10 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx_conf *chanctx_conf; struct ieee80211_chanctx *chanctx; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); chanctx_conf = rcu_dereference_protected(link->conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); /* * This function can be called from a work, thus it may be possible @@ -3039,12 +3049,10 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata, * So nothing should be done in such case. */ if (!chanctx_conf) - goto unlock; + return; chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); ieee80211_recalc_smps_chanctx(local, chanctx); - unlock: - mutex_unlock(&local->chanctx_mtx); } void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, @@ -3055,7 +3063,7 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *chanctx; int i; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); for (i = 0; i < ARRAY_SIZE(sdata->vif.link_conf); i++) { struct ieee80211_bss_conf *bss_conf; @@ -3071,9 +3079,9 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, } chanctx_conf = rcu_dereference_protected(bss_conf->chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + lockdep_is_held(&local->hw.wiphy->mtx)); /* - * Since we hold the chanctx_mtx (checked above) + * Since we hold the wiphy mutex (checked above) * we can take the chanctx_conf pointer out of the * RCU critical section, it cannot go away without * the mutex. Just the way we reached it could - in @@ -3083,14 +3091,12 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (!chanctx_conf) - goto unlock; + return; chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf); ieee80211_recalc_chanctx_min_def(local, chanctx, NULL); } - unlock: - mutex_unlock(&local->chanctx_mtx); } size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) @@ -3778,12 +3784,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info, return true; } -void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation *eht_oper, +void ieee80211_chandef_eht_oper(const struct ieee80211_eht_operation_info *info, bool support_160, bool support_320, struct cfg80211_chan_def *chandef) { - struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional; - chandef->center_freq1 = ieee80211_channel_to_frequency(info->ccfs0, chandef->chan->band); @@ -3952,8 +3956,9 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, support_320 = eht_phy_cap & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ; - ieee80211_chandef_eht_oper(eht_oper, support_160, - support_320, &he_chandef); + ieee80211_chandef_eht_oper((const void *)eht_oper->optional, + support_160, support_320, + &he_chandef); } if (!cfg80211_chandef_valid(&he_chandef)) { @@ -4012,7 +4017,6 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width, const u8 *srates, int srates_len, u32 *rates) { u32 rate_flags = ieee80211_chanwidth_rate_flags(width); - int shift = ieee80211_chanwidth_get_shift(width); struct ieee80211_rate *br; int brate, rate, i, j, count = 0; @@ -4026,7 +4030,7 @@ int ieee80211_parse_bitrates(enum nl80211_chan_width width, if ((rate_flags & br->flags) != rate_flags) continue; - brate = DIV_ROUND_UP(br->bitrate, (1 << shift) * 5); + brate = DIV_ROUND_UP(br->bitrate, 5); if (brate == rate) { *rates |= BIT(j); count++; @@ -4043,12 +4047,11 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - int rate, shift; + int rate; u8 i, rates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; - shift = ieee80211_vif_get_shift(&sdata->vif); rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); sband = local->hw.wiphy->bands[band]; rates = 0; @@ -4073,8 +4076,7 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, if (need_basic && basic_rates & BIT(i)) basic = 0x80; - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - 5 * (1 << shift)); + rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); *pos++ = basic | (u8) rate; } @@ -4087,13 +4089,12 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; - int rate, shift; + int rate; u8 i, exrates, *pos; u32 basic_rates = sdata->vif.bss_conf.basic_rates; u32 rate_flags; rate_flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef); - shift = ieee80211_vif_get_shift(&sdata->vif); sband = local->hw.wiphy->bands[band]; exrates = 0; @@ -4122,8 +4123,7 @@ int ieee80211_add_ext_srates_ie(struct ieee80211_sub_if_data *sdata, continue; if (need_basic && basic_rates & BIT(i)) basic = 0x80; - rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, - 5 * (1 << shift)); + rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5); *pos++ = basic | (u8) rate; } } @@ -4167,6 +4167,8 @@ u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs) * This function calculates the RX timestamp at the given MPDU offset, taking * into account what the RX timestamp was. An offset of 0 will just normalize * the timestamp to TSF at beginning of MPDU reception. + * + * Returns: the calculated timestamp */ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, struct ieee80211_rx_status *status, @@ -4282,25 +4284,13 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, fallthrough; case RX_ENC_LEGACY: { struct ieee80211_supported_band *sband; - int shift = 0; - int bitrate; - - switch (status->bw) { - case RATE_INFO_BW_10: - shift = 1; - break; - case RATE_INFO_BW_5: - shift = 2; - break; - } sband = local->hw.wiphy->bands[status->band]; - bitrate = sband->bitrates[status->rate_idx].bitrate; - ri.legacy = DIV_ROUND_UP(bitrate, (1 << shift)); + ri.legacy = sband->bitrates[status->rate_idx].bitrate; if (status->flag & RX_FLAG_MACTIME_PLCP_START) { if (status->band == NL80211_BAND_5GHZ) { - ts += 20 << shift; + ts += 20; mpdu_offset += 2; } else if (status->enc_flags & RX_ENC_FLAG_SHORTPRE) { ts += 96; @@ -4333,16 +4323,15 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) struct ieee80211_sub_if_data *sdata; struct cfg80211_chan_def chandef; - /* for interface list, to avoid linking iflist_mtx and chanctx_mtx */ lockdep_assert_wiphy(local->hw.wiphy); - mutex_lock(&local->mtx); list_for_each_entry(sdata, &local->interfaces, list) { /* it might be waiting for the local->mtx, but then * by the time it gets it, sdata->wdev.cac_started * will no longer be true */ - cancel_delayed_work(&sdata->deflink.dfs_cac_timer_work); + wiphy_delayed_work_cancel(local->hw.wiphy, + &sdata->deflink.dfs_cac_timer_work); if (sdata->wdev.cac_started) { chandef = sdata->vif.bss_conf.chandef; @@ -4353,7 +4342,6 @@ void ieee80211_dfs_cac_cancel(struct ieee80211_local *local) GFP_KERNEL); } } - mutex_unlock(&local->mtx); } void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, @@ -4365,7 +4353,8 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, struct ieee80211_chanctx *ctx; int num_chanctx = 0; - mutex_lock(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); + list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_REPLACES_OTHER) continue; @@ -4373,7 +4362,6 @@ void ieee80211_dfs_radar_detected_work(struct wiphy *wiphy, num_chanctx++; chandef = ctx->conf.def; } - mutex_unlock(&local->chanctx_mtx); ieee80211_dfs_cac_cancel(local); @@ -4774,7 +4762,7 @@ static u8 ieee80211_chanctx_radar_detect(struct ieee80211_local *local, struct ieee80211_link_data *link; u8 radar_detect = 0; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)) return 0; @@ -4815,7 +4803,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata, .radar_detect = radar_detect, }; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); if (WARN_ON(hweight32(radar_detect) > 1)) return -EINVAL; @@ -4905,7 +4893,7 @@ int ieee80211_max_num_channels(struct ieee80211_local *local) int err; struct iface_combination_params params = {0}; - lockdep_assert_held(&local->chanctx_mtx); + lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry(ctx, &local->chanctx_list, list) { if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED) @@ -5117,31 +5105,3 @@ u8 *ieee80211_ie_build_eht_cap(u8 *pos, return pos; } - -void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id) -{ - unsigned int elem_len; - - if (!len_pos) - return; - - elem_len = skb->data + skb->len - len_pos - 1; - - while (elem_len > 255) { - /* this one is 255 */ - *len_pos = 255; - /* remaining data gets smaller */ - elem_len -= 255; - /* make space for the fragment ID/len in SKB */ - skb_put(skb, 2); - /* shift back the remaining data to place fragment ID/len */ - memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len); - /* place the fragment ID */ - len_pos += 255 + 1; - *len_pos = frag_id; - /* and point to fragment length to update later */ - len_pos++; - } - - *len_pos = elem_len; -} diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 9a6e11d7b4..5c01e12148 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -3,6 +3,7 @@ * Software WEP encryption implementation * Copyright 2002, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright 2003, Instant802 Networks, Inc. + * Copyright (C) 2023 Intel Corporation */ #include <linux/netdevice.h> @@ -250,18 +251,18 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) if (!(status->flag & RX_FLAG_DECRYPTED)) { if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_WEP_DEC_FAIL; } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + IEEE80211_WEP_IV_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NO_IV; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ if (!(status->flag & RX_FLAG_ICV_STRIPPED) && pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_NO_ICV; } return RX_CONTINUE; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 2d8e38b3bc..94dae7cb6d 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -3,7 +3,7 @@ * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2008, Jouni Malinen <j@w1.fi> * Copyright (C) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation */ #include <linux/netdevice.h> @@ -142,7 +142,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) * group keys and only the AP is sending real multicast * frames in the BSS. */ - return RX_DROP_UNUSABLE; + return RX_DROP_U_AP_RX_GROUPCAST; } if (status->flag & RX_FLAG_MMIC_ERROR) @@ -150,10 +150,10 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(hdr->frame_control); if (skb->len < hdrlen + MICHAEL_MIC_LEN) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_MMIC; if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; hdr = (void *)skb->data; data = skb->data + hdrlen; @@ -188,7 +188,7 @@ mic_fail_no_key: NL80211_KEYTYPE_PAIRWISE, rx->key ? rx->key->conf.keyidx : -1, NULL, GFP_ATOMIC); - return RX_DROP_UNUSABLE; + return RX_DROP_U_MMIC_FAIL; } static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) @@ -276,11 +276,11 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; if (!rx->sta || skb->len - hdrlen < 12) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_TKIP; /* it may be possible to optimize this a bit more */ if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; hdr = (void *)skb->data; /* @@ -298,7 +298,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) &rx->tkip.iv32, &rx->tkip.iv16); if (res != TKIP_DECRYPT_OK) - return RX_DROP_UNUSABLE; + return RX_DROP_U_TKIP_FAIL; /* Trim ICV */ if (!(status->flag & RX_FLAG_ICV_STRIPPED)) @@ -523,12 +523,12 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CCMP; if (status->flag & RX_FLAG_MIC_STRIPPED) mic_len = 0; } else { if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; } /* reload hdr - skb might have been reallocated */ @@ -536,7 +536,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CCMP; if (!(status->flag & RX_FLAG_PN_VALIDATED)) { int res; @@ -574,7 +574,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, /* Remove CCMP header and MIC */ if (pskb_trim(skb, skb->len - mic_len)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CCMP_MIC; memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_CCMP_HDR_LEN); @@ -719,12 +719,12 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_DECRYPTED) { if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_GCMP; if (status->flag & RX_FLAG_MIC_STRIPPED) mic_len = 0; } else { if (skb_linearize(rx->skb)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; } /* reload hdr - skb might have been reallocated */ @@ -732,7 +732,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_GCMP; if (!(status->flag & RX_FLAG_PN_VALIDATED)) { int res; @@ -771,7 +771,7 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) /* Remove GCMP header and MIC */ if (pskb_trim(skb, skb->len - mic_len)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_GCMP_MIC; memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen); skb_pull(skb, IEEE80211_GCMP_HDR_LEN); @@ -924,7 +924,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CMAC; mmie = (struct ieee80211_mmie *) (skb->data + skb->len - sizeof(*mmie)); @@ -974,13 +974,13 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx) /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_CMAC256; mmie = (struct ieee80211_mmie_16 *) (skb->data + skb->len - sizeof(*mmie)); if (mmie->element_id != WLAN_EID_MMIE || mmie->length != sizeof(*mmie) - 2) - return RX_DROP_UNUSABLE; /* Invalid MMIE */ + return RX_DROP_U_BAD_MMIE; /* Invalid MMIE */ bip_ipn_swap(ipn, mmie->sequence_number); @@ -1073,7 +1073,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) /* management frames are already linear */ if (skb->len < 24 + sizeof(*mmie)) - return RX_DROP_UNUSABLE; + return RX_DROP_U_SHORT_GMAC; mmie = (struct ieee80211_mmie_16 *) (skb->data + skb->len - sizeof(*mmie)); @@ -1097,7 +1097,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) mic = kmalloc(GMAC_MIC_LEN, GFP_ATOMIC); if (!mic) - return RX_DROP_UNUSABLE; + return RX_DROP_U_OOM; if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce, skb->data + 24, skb->len - 24, mic) < 0 || diff --git a/net/mctp/route.c b/net/mctp/route.c index 7a47a58aa5..6218dcd07e 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, spin_unlock_irqrestore(&mns->keys_lock, flags); if (!tagbits) { - kfree(key); + mctp_key_unref(key); return ERR_PTR(-EBUSY); } diff --git a/net/mptcp/Makefile b/net/mptcp/Makefile index 84e531f86b..bcf1dbf3a4 100644 --- a/net/mptcp/Makefile +++ b/net/mptcp/Makefile @@ -2,7 +2,8 @@ obj-$(CONFIG_MPTCP) += mptcp.o mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ - mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o + mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o \ + mptcp_pm_gen.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o diff --git a/net/mptcp/crypto_test.c b/net/mptcp/crypto_test.c index 017248dea0..220414e5c8 100644 --- a/net/mptcp/crypto_test.c +++ b/net/mptcp/crypto_test.c @@ -70,3 +70,4 @@ static struct kunit_suite mptcp_crypto_suite = { kunit_test_suite(mptcp_crypto_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Crypto"); diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index e72b518c5d..13fe0748dd 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -27,6 +27,7 @@ struct mptcp_pernet { #endif unsigned int add_addr_timeout; + unsigned int close_timeout; unsigned int stale_loss_cnt; u8 mptcp_enabled; u8 checksum_enabled; @@ -65,6 +66,13 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net) return mptcp_get_pernet(net)->stale_loss_cnt; } +unsigned int mptcp_close_timeout(const struct sock *sk) +{ + if (sock_flag(sk, SOCK_DEAD)) + return TCP_TIMEWAIT_LEN; + return mptcp_get_pernet(sock_net(sk))->close_timeout; +} + int mptcp_get_pm_type(const struct net *net) { return mptcp_get_pernet(net)->pm_type; @@ -79,6 +87,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; + pernet->close_timeout = TCP_TIMEWAIT_LEN; pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; pernet->stale_loss_cnt = 4; @@ -141,6 +150,12 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dostring, }, + { + .procname = "close_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, {} }; @@ -163,6 +178,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[4].data = &pernet->stale_loss_cnt; table[5].data = &pernet->pm_type; table[6].data = &pernet->scheduler; + table[7].data = &pernet->close_timeout; hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a536586742..6ff6f14674 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,19 @@ #include <uapi/linux/mptcp.h> #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -63,17 +65,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) sf->map_data_len) || nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) || nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) || - nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) { + nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) { err = -EMSGSIZE; goto nla_failure; } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c index 74698582a2..ad28da655f 100644 --- a/net/mptcp/fastopen.c +++ b/net/mptcp/fastopen.c @@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf mptcp_data_unlock(sk); } -void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) +void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) { struct sock *sk = (struct sock *)msk; struct sk_buff *skb; - mptcp_data_lock(sk); skb = skb_peek_tail(&sk->sk_receive_queue); if (skb) { WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq); @@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_ } pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq); - mptcp_data_unlock(sk); } diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index a0990c365a..c30405e768 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), + SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index cae71d9472..dd7fd1f246 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -65,6 +65,7 @@ enum linux_mptcp_mib_field { * conflict with another subflow while updating msk rcv wnd */ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ + MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ __MPTCP_MIB_MAX }; @@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net, __SNMP_INC_STATS(net->mib.mptcp_statistics, field); } +static inline void MPTCP_DEC_STATS(struct net *net, + enum linux_mptcp_mib_field field) +{ + if (likely(net->mib.mptcp_statistics)) + SNMP_DEC_STATS(net->mib.mptcp_statistics, field); +} + bool mptcp_mib_alloc(struct net *net); diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 8df1bdb647..5409c2ea3f 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -245,4 +245,5 @@ static void __exit mptcp_diag_exit(void) module_init(mptcp_diag_init); module_exit(mptcp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MPTCP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-262 /* AF_INET - IPPROTO_MPTCP */); diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c new file mode 100644 index 0000000000..a2325e70dd --- /dev/null +++ b/net/mptcp/mptcp_pm_gen.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/mptcp.yaml */ +/* YNL-GEN kernel source */ + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include "mptcp_pm_gen.h" + +#include <uapi/linux/mptcp_pm.h> + +/* Common nested types */ +const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1] = { + [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, + [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, + [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, + [MPTCP_PM_ADDR_ATTR_ADDR6] = NLA_POLICY_EXACT_LEN(16), + [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16, }, + [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32, }, + [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32, }, +}; + +/* MPTCP_PM_CMD_ADD_ADDR - do */ +const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_DEL_ADDR - do */ +const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_GET_ADDR - do */ +const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_FLUSH_ADDRS - do */ +const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1] = { + [MPTCP_PM_ENDPOINT_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_SET_LIMITS - do */ +const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = { + [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, +}; + +/* MPTCP_PM_CMD_GET_LIMITS - do */ +const struct nla_policy mptcp_pm_get_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1] = { + [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, +}; + +/* MPTCP_PM_CMD_SET_FLAGS - do */ +const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_ANNOUNCE - do */ +const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, +}; + +/* MPTCP_PM_CMD_REMOVE - do */ +const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1] = { + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, +}; + +/* MPTCP_PM_CMD_SUBFLOW_CREATE - do */ +const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* MPTCP_PM_CMD_SUBFLOW_DESTROY - do */ +const struct nla_policy mptcp_pm_subflow_destroy_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1] = { + [MPTCP_PM_ATTR_ADDR] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), + [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, + [MPTCP_PM_ATTR_ADDR_REMOTE] = NLA_POLICY_NESTED(mptcp_pm_address_nl_policy), +}; + +/* Ops table for mptcp_pm */ +const struct genl_ops mptcp_pm_nl_ops[11] = { + { + .cmd = MPTCP_PM_CMD_ADD_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_add_addr_doit, + .policy = mptcp_pm_add_addr_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_DEL_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_del_addr_doit, + .policy = mptcp_pm_del_addr_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_GET_ADDR, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_get_addr_doit, + .dumpit = mptcp_pm_nl_get_addr_dumpit, + .policy = mptcp_pm_get_addr_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_flush_addrs_doit, + .policy = mptcp_pm_flush_addrs_nl_policy, + .maxattr = MPTCP_PM_ENDPOINT_ADDR, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SET_LIMITS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_set_limits_doit, + .policy = mptcp_pm_set_limits_nl_policy, + .maxattr = MPTCP_PM_ATTR_SUBFLOWS, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_GET_LIMITS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_get_limits_doit, + .policy = mptcp_pm_get_limits_nl_policy, + .maxattr = MPTCP_PM_ATTR_SUBFLOWS, + }, + { + .cmd = MPTCP_PM_CMD_SET_FLAGS, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_set_flags_doit, + .policy = mptcp_pm_set_flags_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_ANNOUNCE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_announce_doit, + .policy = mptcp_pm_announce_nl_policy, + .maxattr = MPTCP_PM_ATTR_TOKEN, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_REMOVE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_remove_doit, + .policy = mptcp_pm_remove_nl_policy, + .maxattr = MPTCP_PM_ATTR_LOC_ID, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_subflow_create_doit, + .policy = mptcp_pm_subflow_create_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, + }, + { + .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, + .validate = GENL_DONT_VALIDATE_STRICT, + .doit = mptcp_pm_nl_subflow_destroy_doit, + .policy = mptcp_pm_subflow_destroy_nl_policy, + .maxattr = MPTCP_PM_ATTR_ADDR_REMOTE, + .flags = GENL_UNS_ADMIN_PERM, + }, +}; diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h new file mode 100644 index 0000000000..10579d1845 --- /dev/null +++ b/net/mptcp/mptcp_pm_gen.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/mptcp.yaml */ +/* YNL-GEN kernel header */ + +#ifndef _LINUX_MPTCP_PM_GEN_H +#define _LINUX_MPTCP_PM_GEN_H + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include <uapi/linux/mptcp_pm.h> + +/* Common nested types */ +extern const struct nla_policy mptcp_pm_address_nl_policy[MPTCP_PM_ADDR_ATTR_IF_IDX + 1]; + +extern const struct nla_policy mptcp_pm_add_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; + +extern const struct nla_policy mptcp_pm_del_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; + +extern const struct nla_policy mptcp_pm_get_addr_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; + +extern const struct nla_policy mptcp_pm_flush_addrs_nl_policy[MPTCP_PM_ENDPOINT_ADDR + 1]; + +extern const struct nla_policy mptcp_pm_set_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1]; + +extern const struct nla_policy mptcp_pm_get_limits_nl_policy[MPTCP_PM_ATTR_SUBFLOWS + 1]; + +extern const struct nla_policy mptcp_pm_set_flags_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1]; + +extern const struct nla_policy mptcp_pm_announce_nl_policy[MPTCP_PM_ATTR_TOKEN + 1]; + +extern const struct nla_policy mptcp_pm_remove_nl_policy[MPTCP_PM_ATTR_LOC_ID + 1]; + +extern const struct nla_policy mptcp_pm_subflow_create_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1]; + +extern const struct nla_policy mptcp_pm_subflow_destroy_nl_policy[MPTCP_PM_ATTR_ADDR_REMOTE + 1]; + +/* Ops table for mptcp_pm */ +extern const struct genl_ops mptcp_pm_nl_ops[11]; + +int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info); +int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, + struct genl_info *info); +int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, + struct genl_info *info); + +#endif /* _LINUX_MPTCP_PM_GEN_H */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index d2527d189a..e3e96a49f9 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ - subflow->fully_established = 1; - WRITE_ONCE(msk->fully_established, true); - goto check_notify; + goto set_fully_established; } /* If the first established packet does not contain MP_CAPABLE + data @@ -986,7 +984,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, set_fully_established: if (unlikely(!READ_ONCE(msk->pm.server_side))) pr_warn_once("bogus mpc option on established client sk"); - mptcp_subflow_fully_established(subflow, mp_opt); + + mptcp_data_lock((struct sock *)msk); + __mptcp_subflow_fully_established(msk, subflow, mp_opt); + mptcp_data_unlock((struct sock *)msk); check_notify: /* if the subflow is not already linked into the conn_list, we can't diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index d8da5374d9..4ae19113b8 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -184,7 +184,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk) spin_unlock_bh(&pm->lock); } -void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, +void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct mptcp_subflow_context *subflow) { struct mptcp_pm_data *pm = &msk->pm; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 3011bc3784..cccb720c1c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk) } } -static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, - const struct mptcp_addr_info *addr) -{ - int i; - - for (i = 0; i < nr; i++) { - if (addrs[i].id == addr->id) - return true; - } - - return false; -} - /* Fill all the remote addresses into the array addrs[], * and return the array size. */ @@ -440,18 +427,34 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, msk->pm.subflows++; addrs[i++] = remote; } else { + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + + /* Forbid creation of new subflows matching existing + * ones, possibly already created by incoming ADD_ADDR + */ + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); + mptcp_for_each_subflow(msk, subflow) + if (READ_ONCE(subflow->local_id) == local->id) + __set_bit(subflow->remote_id, unavail_id); + mptcp_for_each_subflow(msk, subflow) { ssk = mptcp_subflow_tcp_sock(subflow); remote_address((struct sock_common *)ssk, &addrs[i]); - addrs[i].id = subflow->remote_id; + addrs[i].id = READ_ONCE(subflow->remote_id); if (deny_id0 && !addrs[i].id) continue; + if (test_bit(addrs[i].id, unavail_id)) + continue; + if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) continue; - if (!lookup_address_in_vec(addrs, i, &addrs[i]) && - msk->pm.subflows < subflows_max) { + if (msk->pm.subflows < subflows_max) { + /* forbid creating multiple address towards + * this id + */ + __set_bit(addrs[i].id, unavail_id); msk->pm.subflows++; i++; } @@ -799,18 +802,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + u8 remote_id = READ_ONCE(subflow->remote_id); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; - u8 id = subflow->local_id; + u8 id = subflow_get_local_id(subflow); - if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) continue; pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", - i, rm_id, subflow->local_id, subflow->remote_id, - msk->mpc_endpoint_id); + i, rm_id, id, remote_id, msk->mpc_endpoint_id); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); @@ -901,7 +904,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) } static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { struct mptcp_pm_addr_entry *cur, *del_entry = NULL; unsigned int addr_max; @@ -949,7 +953,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, } } - if (!entry->addr.id) { + if (!entry->addr.id && needs_id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, @@ -960,7 +964,7 @@ find_next: } } - if (!entry->addr.id) + if (!entry->addr.id && needs_id) goto out; __set_bit(entry->addr.id, pernet->id_bitmap); @@ -1048,6 +1052,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + /* We don't use mptcp_set_state() here because it needs to be called + * under the msk socket lock. For the moment, that will not bring + * anything more than only calling inet_sk_state_store(), because the + * old status is known (TCP_CLOSE). + */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); @@ -1087,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc entry->ifindex = 0; entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; entry->lsk = NULL; - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); if (ret < 0) kfree(entry); @@ -1104,29 +1113,6 @@ static const struct genl_multicast_group mptcp_pm_mcgrps[] = { }, }; -static const struct nla_policy -mptcp_pm_addr_policy[MPTCP_PM_ADDR_ATTR_MAX + 1] = { - [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, - [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, - [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, - [MPTCP_PM_ADDR_ATTR_ADDR6] = - NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), - [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16 }, - [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32 }, - [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32 }, -}; - -static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { - [MPTCP_PM_ATTR_ADDR] = - NLA_POLICY_NESTED(mptcp_pm_addr_policy), - [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, - [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, - [MPTCP_PM_ATTR_ADDR_REMOTE] = - NLA_POLICY_NESTED(mptcp_pm_addr_policy), -}; - void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); @@ -1188,7 +1174,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], /* no validation needed - was already done via nested policy */ err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, - mptcp_pm_addr_policy, info->extack); + mptcp_pm_address_nl_policy, info->extack); if (err) return err; @@ -1303,9 +1289,21 @@ next: return 0; } -static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) +static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, + struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; + + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, + mptcp_pm_address_nl_policy, info->extack) && + tb[MPTCP_PM_ADDR_ATTR_ID]) + return true; + return false; +} + +int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; int ret; @@ -1344,7 +1342,8 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) goto out_free; } } - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, + !mptcp_pm_has_addr_attr_id(attr, info)); if (ret < 0) { GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); goto out_free; @@ -1484,9 +1483,9 @@ next: return 0; } -static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; unsigned int addr_max; @@ -1620,7 +1619,7 @@ static void __reset_counters(struct pm_nl_pernet *pernet) pernet->addrs = 0; } -static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); LIST_HEAD(free_list); @@ -1676,9 +1675,9 @@ nla_put_failure: return -EMSGSIZE; } -static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_addr_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; + struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; struct sk_buff *msg; @@ -1726,8 +1725,8 @@ fail: return ret; } -static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, - struct netlink_callback *cb) +int mptcp_pm_nl_get_addr_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) { struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; @@ -1784,8 +1783,7 @@ static int parse_limit(struct genl_info *info, int id, unsigned int *limit) return 0; } -static int -mptcp_nl_cmd_set_limits(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); unsigned int rcv_addrs, subflows; @@ -1810,8 +1808,7 @@ unlock: return ret; } -static int -mptcp_nl_cmd_get_limits(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) { struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct sk_buff *msg; @@ -1920,7 +1917,7 @@ int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 return 0; } -static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) { struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; @@ -2000,7 +1997,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) if (WARN_ON_ONCE(!sf)) return -EINVAL; - if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) return -EMSGSIZE; if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) @@ -2284,72 +2281,13 @@ nla_put_failure: nlmsg_free(skb); } -static const struct genl_small_ops mptcp_pm_ops[] = { - { - .cmd = MPTCP_PM_CMD_ADD_ADDR, - .doit = mptcp_nl_cmd_add_addr, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_DEL_ADDR, - .doit = mptcp_nl_cmd_del_addr, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, - .doit = mptcp_nl_cmd_flush_addrs, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_GET_ADDR, - .doit = mptcp_nl_cmd_get_addr, - .dumpit = mptcp_nl_cmd_dump_addrs, - }, - { - .cmd = MPTCP_PM_CMD_SET_LIMITS, - .doit = mptcp_nl_cmd_set_limits, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_GET_LIMITS, - .doit = mptcp_nl_cmd_get_limits, - }, - { - .cmd = MPTCP_PM_CMD_SET_FLAGS, - .doit = mptcp_nl_cmd_set_flags, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_ANNOUNCE, - .doit = mptcp_nl_cmd_announce, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_REMOVE, - .doit = mptcp_nl_cmd_remove, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, - .doit = mptcp_nl_cmd_sf_create, - .flags = GENL_UNS_ADMIN_PERM, - }, - { - .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, - .doit = mptcp_nl_cmd_sf_destroy, - .flags = GENL_UNS_ADMIN_PERM, - }, -}; - static struct genl_family mptcp_genl_family __ro_after_init = { .name = MPTCP_PM_NAME, .version = MPTCP_PM_VER, - .maxattr = MPTCP_PM_ATTR_MAX, - .policy = mptcp_pm_policy, .netnsok = true, .module = THIS_MODULE, - .small_ops = mptcp_pm_ops, - .n_small_ops = ARRAY_SIZE(mptcp_pm_ops), + .ops = mptcp_pm_nl_ops, + .n_ops = ARRAY_SIZE(mptcp_pm_nl_ops), .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1, .mcgrps = mptcp_pm_mcgrps, .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index d042d32beb..01b3a8f2f0 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, - struct mptcp_pm_addr_entry *entry) + struct mptcp_pm_addr_entry *entry, + bool needs_id) { DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_pm_addr_entry *match = NULL; @@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); - if (addr_match && entry->addr.id == 0) + if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; id_match = (e->addr.id == entry->addr.id); if (addr_match && id_match) { @@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, } *e = *entry; - if (!e->addr.id) + if (!e->addr.id && needs_id) e->addr.id = find_next_zero_bit(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1, 1); @@ -130,10 +131,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { - struct mptcp_pm_addr_entry new_entry; + struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&e->addr, skc, false)) { + entry = e; + break; + } + } + spin_unlock_bh(&msk->pm.lock); + if (entry) + return entry->addr.id; + memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry)); new_entry.addr = *skc; new_entry.addr.id = 0; @@ -142,16 +154,17 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, if (new_entry.addr.port == msk_sport) new_entry.addr.port = 0; - return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry); + return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } -int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry addr_val; struct mptcp_sock *msk; int err = -EINVAL; + struct sock *sk; u32 token_val; if (!addr || !token) { @@ -167,6 +180,8 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) return err; } + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); goto announce_err; @@ -184,13 +199,13 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) goto announce_err; } - err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val); + err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto announce_err; } - lock_sock((struct sock *)msk); + lock_sock(sk); spin_lock_bh(&msk->pm.lock); if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) { @@ -200,15 +215,49 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info) } spin_unlock_bh(&msk->pm.lock); - release_sock((struct sock *)msk); + release_sock(sk); err = 0; announce_err: - sock_put((struct sock *)msk); + sock_put(sk); return err; } -int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) +static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk, + struct genl_info *info) +{ + struct mptcp_rm_list list = { .nr = 0 }; + struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + bool has_id_0 = false; + int err = -EINVAL; + + lock_sock(sk); + mptcp_for_each_subflow(msk, subflow) { + if (READ_ONCE(subflow->local_id) == 0) { + has_id_0 = true; + break; + } + } + if (!has_id_0) { + GENL_SET_ERR_MSG(info, "address with id 0 not found"); + goto remove_err; + } + + list.ids[list.nr++] = 0; + + spin_lock_bh(&msk->pm.lock); + mptcp_pm_remove_addr(msk, &list); + spin_unlock_bh(&msk->pm.lock); + + err = 0; + +remove_err: + release_sock(sk); + return err; +} + +int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; @@ -217,6 +266,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) struct mptcp_sock *msk; LIST_HEAD(free_list); int err = -EINVAL; + struct sock *sk; u32 token_val; u8 id_val; @@ -234,12 +284,19 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) return err; } + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); goto remove_err; } - lock_sock((struct sock *)msk); + if (id_val == 0) { + err = mptcp_userspace_pm_remove_id_zero_address(msk, info); + goto remove_err; + } + + lock_sock(sk); list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { if (entry->addr.id == id_val) { @@ -250,7 +307,7 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) if (!match) { GENL_SET_ERR_MSG(info, "address with specified id not found"); - release_sock((struct sock *)msk); + release_sock(sk); goto remove_err; } @@ -258,19 +315,19 @@ int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info) mptcp_pm_remove_addrs(msk, &free_list); - release_sock((struct sock *)msk); + release_sock(sk); list_for_each_entry_safe(match, entry, &free_list, list) { - sock_kfree_s((struct sock *)msk, match, sizeof(*match)); + sock_kfree_s(sk, match, sizeof(*match)); } err = 0; remove_err: - sock_put((struct sock *)msk); + sock_put(sk); return err; } -int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -296,6 +353,8 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) return err; } + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); goto create_err; @@ -313,8 +372,6 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) goto create_err; } - sk = (struct sock *)msk; - if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) { GENL_SET_ERR_MSG(info, "families mismatch"); err = -EINVAL; @@ -322,7 +379,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) } local.addr = addr_l; - err = mptcp_userspace_pm_append_new_local_addr(msk, &local); + err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false); if (err < 0) { GENL_SET_ERR_MSG(info, "did not match address and id"); goto create_err; @@ -342,7 +399,7 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&msk->pm.lock); create_err: - sock_put((struct sock *)msk); + sock_put(sk); return err; } @@ -394,7 +451,7 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, return NULL; } -int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) +int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; @@ -419,6 +476,8 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) return err; } + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) { GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); goto destroy_err; @@ -448,7 +507,6 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) goto destroy_err; } - sk = (struct sock *)msk; lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); if (ssk) { @@ -468,7 +526,7 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info) release_sock(sk); destroy_err: - sock_put((struct sock *)msk); + sock_put(sk); return err; } @@ -478,6 +536,7 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, { struct mptcp_sock *msk; int ret = -EINVAL; + struct sock *sk; u32 token_val; token_val = nla_get_u32(token); @@ -486,6 +545,8 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, if (!msk) return ret; + sk = (struct sock *)msk; + if (!mptcp_pm_is_userspace(msk)) goto set_flags_err; @@ -493,11 +554,11 @@ int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token, rem->addr.family == AF_UNSPEC) goto set_flags_err; - lock_sock((struct sock *)msk); + lock_sock(sk); ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup); - release_sock((struct sock *)msk); + release_sock(sk); set_flags_err: - sock_put((struct sock *)msk); + sock_put(sk); return ret; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5c003a0f0f..5305f2ff0f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -99,7 +99,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) subflow->subflow_id = msk->subflow_id++; /* This is the first subflow, always with id 0 */ - subflow->local_id_valid = 1; + WRITE_ONCE(subflow->local_id, 0); mptcp_sock_graft(msk->first, sk->sk_socket); iput(SOCK_INODE(ssock)); @@ -121,8 +121,6 @@ struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk) ret = __mptcp_socket_create(msk); if (ret) return ERR_PTR(ret); - - mptcp_sockopt_sync(msk, msk->first); } return msk->first; @@ -445,11 +443,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk) switch (sk->sk_state) { case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_FIN_WAIT2); + mptcp_set_state(sk, TCP_FIN_WAIT2); break; case TCP_CLOSING: case TCP_LAST_ACK: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; } @@ -610,13 +608,13 @@ static bool mptcp_check_data_fin(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); + mptcp_set_state(sk, TCP_CLOSE_WAIT); break; case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); + mptcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; default: /* Other states not expected */ @@ -791,7 +789,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) */ ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); + mptcp_set_state(sk, ssk_state); WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ @@ -863,9 +861,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); - if (move_skbs_to_msk(msk, ssk)) + if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) sk->sk_data_ready(sk); - mptcp_data_unlock(sk); } @@ -1274,7 +1271,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, * queue management operation, to avoid breaking the ext <-> * SSN association set here */ - mpext = skb_ext_find(skb, SKB_EXT_MPTCP); + mpext = mptcp_get_ext(skb); if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; goto alloc_skb; @@ -1296,7 +1293,7 @@ alloc_skb: i = skb_shinfo(skb)->nr_frags; reuse_skb = false; - mpext = skb_ext_find(skb, SKB_EXT_MPTCP); + mpext = mptcp_get_ext(skb); } /* Zero window and all data acked? Probe. */ @@ -1522,8 +1519,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk, void mptcp_check_and_set_pending(struct sock *sk) { - if (mptcp_send_head(sk)) - mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING); + if (mptcp_send_head(sk)) { + mptcp_data_lock(sk); + mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING); + mptcp_data_unlock(sk); + } } static int __subflow_push_pending(struct sock *sk, struct sock *ssk, @@ -1767,6 +1767,18 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return ret; } +static int do_copy_data_nocache(struct sock *sk, int copy, + struct iov_iter *from, char *to) +{ + if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { + if (!copy_from_iter_full_nocache(to, copy, from)) + return -EFAULT; + } else if (!copy_from_iter_full(to, copy, from)) { + return -EFAULT; + } + return 0; +} + static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1840,11 +1852,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!sk_wmem_schedule(sk, total_ts)) goto wait_for_memory; - if (copy_page_from_iter(dfrag->page, offset, psize, - &msg->msg_iter) != psize) { - ret = -EFAULT; + ret = do_copy_data_nocache(sk, psize, &msg->msg_iter, + page_address(dfrag->page) + offset); + if (ret) goto do_error; - } /* data successfully copied into the write queue */ sk_forward_alloc_add(sk, -total_ts); @@ -1928,6 +1939,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, if (!(flags & MSG_PEEK)) { MPTCP_SKB_CB(skb)->offset += count; MPTCP_SKB_CB(skb)->map_seq += count; + msk->bytes_consumed += count; } break; } @@ -1938,6 +1950,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize); __skb_unlink(skb, &msk->receive_queue); __kfree_skb(skb); + msk->bytes_consumed += count; } if (copied >= len) @@ -1964,6 +1977,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (copied <= 0) return; + if (!msk->rcvspace_init) + mptcp_rcv_space_init(msk, msk->first); + msk->rcvq_space.copied += copied; mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC); @@ -2318,9 +2334,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(msk)) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue @@ -2397,8 +2410,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, if (msk->in_accept_queue && msk->first == ssk && (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) { /* ensure later check in mptcp_worker() will dispose the msk */ - mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1)); sock_set_flag(sk, SOCK_DEAD); + mptcp_set_close_tout(sk, tcp_jiffies32 - (mptcp_close_timeout(sk) + 1)); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); mptcp_subflow_drop_ctx(ssk); goto out_release; @@ -2467,7 +2480,7 @@ out: inet_sk_state_load(msk->first) == TCP_CLOSE) { if (sk->sk_state != TCP_ESTABLISHED || msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_close_wake_up(sk); } else { mptcp_start_tout_timer(sk); @@ -2484,7 +2497,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, /* subflow aborted before reaching the fully_established status * attempt the creation of the next subflow */ - mptcp_pm_subflow_check_next(mptcp_sk(sk), ssk, subflow); + mptcp_pm_subflow_check_next(mptcp_sk(sk), subflow); __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_PUSH); } @@ -2523,7 +2536,7 @@ static bool mptcp_close_tout_expired(const struct sock *sk) return false; return time_after32(tcp_jiffies32, - inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN); + inet_csk(sk)->icsk_mtup.probe_timestamp + mptcp_close_timeout(sk)); } static void mptcp_check_fastclose(struct mptcp_sock *msk) @@ -2562,7 +2575,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) WRITE_ONCE(sk->sk_err, ECONNRESET); } - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); @@ -2666,7 +2679,7 @@ void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout) return; close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + - TCP_TIMEWAIT_LEN; + mptcp_close_timeout(sk); /* the close timeout takes precedence on the fail one, and here at least one of * them is active @@ -2697,7 +2710,7 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, MPTCP_CF_FASTCLOSE); @@ -2762,6 +2775,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->rmem_fwd_alloc = 0; WRITE_ONCE(msk->rmem_released, 0); msk->timer_ival = TCP_RTO_MIN; + msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; @@ -2874,6 +2888,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) release_sock(ssk); } +void mptcp_set_state(struct sock *sk, int state) +{ + int oldstate = sk->sk_state; + + switch (state) { + case TCP_ESTABLISHED: + if (oldstate != TCP_ESTABLISHED) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + break; + + default: + if (oldstate == TCP_ESTABLISHED) + MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + } + + inet_sk_state_store(sk, state); +} + static const unsigned char new_state[16] = { /* current state: new state: action: */ [0 /* (Invalid) */] = TCP_CLOSE, @@ -2896,7 +2928,7 @@ static int mptcp_close_state(struct sock *sk) int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; - inet_sk_state_store(sk, ns); + mptcp_set_state(sk, ns); return next & TCP_ACTION_FIN; } @@ -2971,16 +3003,9 @@ void __mptcp_unaccepted_force_close(struct sock *sk) __mptcp_destroy_sock(sk); } -static __poll_t mptcp_check_readable(struct mptcp_sock *msk) +static __poll_t mptcp_check_readable(struct sock *sk) { - /* Concurrent splices from sk_receive_queue into receive_queue will - * always show at least one non-empty queue when checked in this order. - */ - if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && - skb_queue_empty_lockless(&msk->receive_queue)) - return 0; - - return EPOLLIN | EPOLLRDNORM; + return mptcp_epollin_ready(sk) ? EPOLLIN | EPOLLRDNORM : 0; } static void mptcp_check_listen_stop(struct sock *sk) @@ -3014,11 +3039,11 @@ bool __mptcp_close(struct sock *sk, long timeout) if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); goto cleanup; } - if (mptcp_check_readable(msk) || timeout < 0) { + if (mptcp_data_avail(msk) || timeout < 0) { /* If the msk has read data, or the caller explicitly ask it, * do the MPTCP equivalent of TCP reset, aka MPTCP fastclose */ @@ -3057,7 +3082,7 @@ cleanup: * state, let's not keep resources busy for no reasons */ if (subflows_alive == 0) - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); @@ -3123,7 +3148,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) return -EBUSY; mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); mptcp_stop_tout_timer(sk); @@ -3137,7 +3162,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); WRITE_ONCE(msk->flags, 0); msk->cb_flags = 0; - msk->push_pending = 0; msk->recovery = false; msk->can_ack = false; msk->fully_established = false; @@ -3145,6 +3169,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->snd_data_fin_enable = false; msk->rcv_fastclose = false; msk->use_64bit_ack = false; + msk->bytes_consumed = 0; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); mptcp_pm_data_reset(msk); mptcp_ca_reset(sk); @@ -3152,6 +3177,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->bytes_received = 0; msk->bytes_sent = 0; msk->bytes_retrans = 0; + msk->rcvspace_init = 0; WRITE_ONCE(sk->sk_shutdown, 0); sk_error_report(sk); @@ -3174,6 +3200,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk; if (!nsk) @@ -3210,11 +3237,12 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ - inet_sk_state_store(nsk, TCP_ESTABLISHED); + mptcp_set_state(nsk, TCP_ESTABLISHED); /* The msk maintain a ref to each subflow in the connections list */ WRITE_ONCE(msk->first, ssk); - list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list); + subflow = mptcp_subflow_ctx(ssk); + list_add(&subflow->node, &msk->conn_list); sock_hold(ssk); /* new mpc subflow takes ownership of the newly @@ -3229,6 +3257,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, __mptcp_propagate_sndbuf(nsk, ssk); mptcp_rcv_space_init(msk, ssk); + + if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK) + __mptcp_subflow_fully_established(msk, subflow, mp_opt); bh_unlock_sock(nsk); /* note: the newly allocated socket refcount is 2 now */ @@ -3239,6 +3270,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) { const struct tcp_sock *tp = tcp_sk(ssk); + msk->rcvspace_init = 1; msk->rcvq_space.copied = 0; msk->rcvq_space.rtt_us = 0; @@ -3249,8 +3281,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) TCP_INIT_CWND * tp->advmss); if (msk->rcvq_space.space == 0) msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; - - WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); } static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err, @@ -3362,8 +3392,7 @@ static void mptcp_release_cb(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); for (;;) { - unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) | - msk->push_pending; + unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED); struct list_head join_list; if (!flags) @@ -3379,7 +3408,6 @@ static void mptcp_release_cb(struct sock *sk) * datapath acquires the msk socket spinlock while helding * the subflow socket lock */ - msk->push_pending = 0; msk->cb_flags &= ~flags; spin_unlock_bh(&sk->sk_lock.slock); @@ -3507,13 +3535,8 @@ void mptcp_finish_connect(struct sock *ssk) * accessing the field below */ WRITE_ONCE(msk->local_key, subflow->local_key); - WRITE_ONCE(msk->write_seq, subflow->idsn + 1); - WRITE_ONCE(msk->snd_nxt, msk->write_seq); - WRITE_ONCE(msk->snd_una, msk->write_seq); mptcp_pm_new_connection(msk, ssk, 0); - - mptcp_rcv_space_init(msk, ssk); } void mptcp_sock_graft(struct sock *sk, struct socket *parent) @@ -3669,7 +3692,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(ssk)) return PTR_ERR(ssk); - inet_sk_state_store(sk, TCP_SYN_SENT); + mptcp_set_state(sk, TCP_SYN_SENT); subflow = mptcp_subflow_ctx(ssk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -3719,7 +3742,7 @@ out: if (unlikely(err)) { /* avoid leaving a dangling token in an unconnected socket */ mptcp_token_destroy(msk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); return err; } @@ -3809,13 +3832,13 @@ static int mptcp_listen(struct socket *sock, int backlog) goto unlock; } - inet_sk_state_store(sk, TCP_LISTEN); + mptcp_set_state(sk, TCP_LISTEN); sock_set_flag(sk, SOCK_RCU_FREE); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); release_sock(ssk); - inet_sk_state_store(sk, inet_sk_state_load(ssk)); + mptcp_set_state(sk, inet_sk_state_load(ssk)); if (!err) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3875,7 +3898,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, __mptcp_close_ssk(newsk, msk->first, mptcp_subflow_ctx(msk->first), 0); if (unlikely(list_is_singular(&msk->conn_list))) - inet_sk_state_store(newsk, TCP_CLOSE); + mptcp_set_state(newsk, TCP_CLOSE); } } release_sock(newsk); @@ -3928,7 +3951,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { - mask |= mptcp_check_readable(msk); + mask |= mptcp_check_readable(sk); if (shutdown & SEND_SHUTDOWN) mask |= EPOLLOUT | EPOLLWRNORM; else @@ -3966,6 +3989,7 @@ static const struct proto_ops mptcp_stream_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, + .set_rcvlowat = mptcp_set_rcvlowat, }; static struct inet_protosw mptcp_protosw = { @@ -4067,6 +4091,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif + .set_rcvlowat = mptcp_set_rcvlowat, }; static struct proto mptcp_v6_prot; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 07c5ac37d0..3e50baba1b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -13,6 +13,8 @@ #include <uapi/linux/mptcp.h> #include <net/genetlink.h> +#include "mptcp_pm_gen.h" + #define MPTCP_SUPPORTED_VERSION 1 /* MPTCP option bits */ @@ -268,6 +270,7 @@ struct mptcp_sock { atomic64_t rcv_wnd_sent; u64 rcv_data_fin_seq; u64 bytes_retrans; + u64 bytes_consumed; int rmem_fwd_alloc; int snd_burst; int old_wspace; @@ -283,7 +286,6 @@ struct mptcp_sock { int rmem_released; unsigned long flags; unsigned long cb_flags; - unsigned long push_pending; bool recovery; /* closing subflow write queue reinjected */ bool can_ack; bool fully_established; @@ -302,7 +304,8 @@ struct mptcp_sock { nodelay:1, fastopening:1, in_accept_queue:1, - free_first:1; + free_first:1, + rcvspace_init:1; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -436,11 +439,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk) return (struct mptcp_subflow_request_sock *)rsk; } -enum mptcp_data_avail { - MPTCP_SUBFLOW_NODATA, - MPTCP_SUBFLOW_DATA_AVAIL, -}; - struct mptcp_delegated_action { struct napi_struct napi; struct list_head head; @@ -493,11 +491,10 @@ struct mptcp_subflow_context { remote_key_valid : 1, /* received the peer key from */ disposable : 1, /* ctx can be free at ulp release time */ stale : 1, /* unable to snd/rcv data, do not use for xmit */ - local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1, /* at least one csum validated */ is_mptfo : 1, /* subflow is doing TFO */ - __unused : 9; - enum mptcp_data_avail data_avail; + __unused : 10; + bool data_avail; bool scheduled; u32 remote_nonce; u64 thmac; @@ -507,7 +504,7 @@ struct mptcp_subflow_context { u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */ u64 iasn; /* initial ack sequence number, MPC subflows only */ }; - u8 local_id; + s16 local_id; /* if negative not initialized yet */ u8 remote_id; u8 reset_seen:1; u8 reset_transient:1; @@ -558,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; + WRITE_ONCE(subflow->local_id, -1); } static inline u64 @@ -621,10 +619,12 @@ unsigned int mptcp_get_add_addr_timeout(const struct net *net); int mptcp_is_checksum_enabled(const struct net *net); int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); +unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); -void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); +void __mptcp_subflow_fully_established(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); bool __mptcp_retransmit_pending_data(struct sock *sk); void mptcp_check_and_set_pending(struct sock *sk); void __mptcp_push_pending(struct sock *sk, unsigned int flags); @@ -642,6 +642,7 @@ bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void __mptcp_unaccepted_force_close(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); +void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); @@ -669,6 +670,24 @@ struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk); int mptcp_sched_get_send(struct mptcp_sock *msk); int mptcp_sched_get_retrans(struct mptcp_sock *msk); +static inline u64 mptcp_data_avail(const struct mptcp_sock *msk) +{ + return READ_ONCE(msk->bytes_received) - READ_ONCE(msk->bytes_consumed); +} + +static inline bool mptcp_epollin_ready(const struct sock *sk) +{ + /* mptcp doesn't have to deal with small skbs in the receive queue, + * at it can always coalesce them + */ + return (mptcp_data_avail(mptcp_sk(sk)) >= sk->sk_rcvlowat) || + (mem_cgroup_sockets_enabled && sk->sk_memcg && + mem_cgroup_under_socket_pressure(sk->sk_memcg)) || + READ_ONCE(tcp_memory_pressure); +} + +int mptcp_set_rcvlowat(struct sock *sk, int val); + static inline bool __tcp_can_send(const struct sock *ssk) { /* only send if our side has not closed yet */ @@ -743,6 +762,7 @@ static inline bool mptcp_is_fully_established(struct sock *sk) return inet_sk_state_load(sk) == TCP_ESTABLISHED && READ_ONCE(mptcp_sk(sk)->fully_established); } + void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); @@ -873,7 +893,7 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); -void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, +void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct mptcp_subflow_context *subflow); void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr); @@ -924,10 +944,6 @@ void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); -int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info); -int mptcp_nl_cmd_remove(struct sk_buff *skb, struct genl_info *info); -int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info); -int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); @@ -937,8 +953,8 @@ void mptcp_event_pm_listener(const struct sock *ssk, enum mptcp_event_type event); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); -void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt); +void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt); void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow, struct request_sock *req); @@ -1006,6 +1022,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow) +{ + int local_id = READ_ONCE(subflow->local_id); + + if (local_id < 0) + return 0; + return local_id; +} + void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, @@ -1054,7 +1079,7 @@ static inline bool mptcp_check_fallback(const struct sock *sk) static inline void __mptcp_do_fallback(struct mptcp_sock *msk) { - if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) { + if (__mptcp_check_fallback(msk)) { pr_debug("TCP fallback already done (msk=%p)", msk); return; } @@ -1104,7 +1129,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) && + return (1 << sk->sk_state) & + (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) && is_active_ssk(subflow) && !subflow->conn_finished; } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 116e300823..3536807337 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -89,7 +89,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); break; case SO_PRIORITY: - ssk->sk_priority = val; + WRITE_ONCE(ssk->sk_priority, val); break; case SO_SNDBUF: case SO_SNDBUFFORCE: @@ -735,7 +735,7 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, lock_sock(sk); sockopt_seq_inc(msk); - val = inet_sk(sk)->tos; + val = READ_ONCE(inet_sk(sk)->tos); mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow; @@ -919,7 +919,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) mptcp_pm_get_local_addr_max(msk); } - if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) + if (__mptcp_check_fallback(msk)) flags |= MPTCP_INFO_FLAG_FALLBACK; if (READ_ONCE(msk->can_ack)) flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; @@ -1347,7 +1347,7 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, switch (optname) { case IP_TOS: - return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos); + return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); } return -EOPNOTSUPP; @@ -1450,37 +1450,63 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); } -static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) -{ - bool slow = lock_sock_fast(ssk); - - sync_socket_options(msk, ssk); - - unlock_sock_fast(ssk, slow); -} - -void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) +void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); msk_owned_by_me(msk); + ssk->sk_rcvlowat = 0; + + /* subflows must ignore any latency-related settings: will not affect + * the user-space - only the msk is relevant - but will foul the + * mptcp scheduler + */ + tcp_sk(ssk)->notsent_lowat = UINT_MAX; + if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { - __mptcp_sockopt_sync(msk, ssk); + sync_socket_options(msk, ssk); subflow->setsockopt_seq = msk->setsockopt_seq; } } -void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) +/* unfortunately this is different enough from the tcp version so + * that we can't factor it out + */ +int mptcp_set_rcvlowat(struct sock *sk, int val) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_subflow_context *subflow; + int space, cap; - msk_owned_by_me(msk); + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + cap = sk->sk_rcvbuf >> 1; + else + cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; + val = min(val, cap); + WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); - if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { - sync_socket_options(msk, ssk); + /* Check if we need to signal EPOLLIN right now */ + if (mptcp_epollin_ready(sk)) + sk->sk_data_ready(sk); - subflow->setsockopt_seq = msk->setsockopt_seq; + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + return 0; + + space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val); + if (space <= sk->sk_rcvbuf) + return 0; + + /* propagate the rcvbuf changes to all the subflows */ + WRITE_ONCE(sk->sk_rcvbuf, space); + mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + bool slow; + + slow = lock_sock_fast(ssk); + WRITE_ONCE(ssk->sk_rcvbuf, space); + tcp_sk(ssk)->window_clamp = val; + unlock_sock_fast(ssk, slow); } + return 0; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index d3c5ecf8dd..71ba86246f 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -9,8 +9,8 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> -#include <crypto/algapi.h> #include <crypto/sha2.h> +#include <crypto/utils.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc void __mptcp_sync_state(struct sock *sk, int state) { + struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); + struct sock *ssk = msk->first; + + subflow = mptcp_subflow_ctx(ssk); + __mptcp_propagate_sndbuf(sk, ssk); + if (!msk->rcvspace_init) + mptcp_rcv_space_init(msk, ssk); - __mptcp_propagate_sndbuf(sk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, state); + /* subflow->idsn is always available is TCP_SYN_SENT state, + * even for the FASTOPEN scenarios + */ + WRITE_ONCE(msk->write_seq, subflow->idsn + 1); + WRITE_ONCE(msk->snd_nxt, msk->write_seq); + mptcp_set_state(sk, state); sk->sk_state_change(sk); } } -static void mptcp_propagate_state(struct sock *sk, struct sock *ssk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - - mptcp_data_lock(sk); - if (!sock_owned_by_user(sk)) { - __mptcp_sync_state(sk, ssk->sk_state); - } else { - msk->pending_state = ssk->sk_state; - __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); - } - mptcp_data_unlock(sk); -} - static void subflow_set_remote_key(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, const struct mptcp_options_received *mp_opt) @@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk, atomic64_set(&msk->rcv_wnd_sent, subflow->iasn); } +static void mptcp_propagate_state(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_data_lock(sk); + if (mp_opt) { + /* Options are available only in the non fallback cases + * avoid updating rx path fields otherwise + */ + WRITE_ONCE(msk->snd_una, subflow->idsn + 1); + WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd); + subflow_set_remote_key(msk, subflow, mp_opt); + } + + if (!sock_owned_by_user(sk)) { + __mptcp_sync_state(sk, ssk->sk_state); + } else { + msk->pending_state = ssk->sk_state; + __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags); + } + mptcp_data_unlock(sk); +} + static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -499,10 +521,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (mp_opt.deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; - subflow_set_remote_key(msk, subflow, &mp_opt); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK); mptcp_finish_connect(sk); - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, &mp_opt); } else if (subflow->request_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -514,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; - subflow->remote_id = mp_opt.join_id; + WRITE_ONCE(subflow->remote_id, mp_opt.join_id); pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); @@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) } } else if (mptcp_check_fallback(sk)) { fallback: - mptcp_rcv_space_init(msk, sk); - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, NULL); } return; @@ -557,8 +577,8 @@ do_reset: static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id) { - subflow->local_id = local_id; - subflow->local_id_valid = 1; + WARN_ON_ONCE(local_id < 0 || local_id > 255); + WRITE_ONCE(subflow->local_id, local_id); } static int subflow_chk_local_id(struct sock *sk) @@ -567,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(subflow->conn); int err; - if (likely(subflow->local_id_valid)) + if (likely(subflow->local_id >= 0)) return 0; err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk); @@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk) kfree_rcu(ctx, rcu); } -void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, - const struct mptcp_options_received *mp_opt) +void __mptcp_subflow_fully_established(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + const struct mptcp_options_received *mp_opt) { - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - subflow_set_remote_key(msk, subflow, mp_opt); subflow->fully_established = 1; WRITE_ONCE(msk->fully_established, true); if (subflow->is_mptfo) - mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); + __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt); } static struct sock *subflow_syn_recv_sock(const struct sock *sk, @@ -834,7 +853,6 @@ create_child: * mpc option */ if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) { - mptcp_subflow_fully_established(ctx, &mp_opt); mptcp_pm_fully_established(owner, child); ctx->pm_notified = 1; } @@ -1244,7 +1262,7 @@ static bool subflow_check_data_avail(struct sock *ssk) struct sk_buff *skb; if (!skb_peek(&ssk->sk_receive_queue)) - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); if (subflow->data_avail) return true; @@ -1278,7 +1296,7 @@ static bool subflow_check_data_avail(struct sock *ssk) continue; } - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); break; } return true; @@ -1300,7 +1318,7 @@ fallback: goto reset; } mptcp_subflow_fail(msk, ssk); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); return true; } @@ -1317,7 +1335,7 @@ reset: while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); tcp_send_active_reset(ssk, GFP_ATOMIC); - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); return false; } @@ -1329,7 +1347,7 @@ reset: subflow->map_seq = READ_ONCE(msk->ack_seq); subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + WRITE_ONCE(subflow->data_avail, true); return true; } @@ -1341,7 +1359,7 @@ bool mptcp_subflow_data_available(struct sock *sk) if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + WRITE_ONCE(subflow->data_avail, false); pr_debug("Done with mapping: seq=%u data_len=%u", subflow->map_subflow_seq, @@ -1412,10 +1430,18 @@ static void subflow_data_ready(struct sock *sk) WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && !subflow->mp_join && !(state & TCPF_CLOSE)); - if (mptcp_subflow_data_available(sk)) + if (mptcp_subflow_data_available(sk)) { mptcp_data_ready(parent, sk); - else if (unlikely(sk->sk_err)) + + /* subflow-level lowat test are not relevant. + * respect the msk-level threshold eventually mandating an immediate ack + */ + if (mptcp_data_avail(msk) < parent->sk_rcvlowat && + (tcp_sk(sk)->rcv_nxt - tcp_sk(sk)->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss) + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } else if (unlikely(sk->sk_err)) { subflow_error_report(sk); + } } static void subflow_write_space(struct sock *ssk) @@ -1532,8 +1558,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (addr.ss_family == AF_INET6) addrlen = sizeof(struct sockaddr_in6); #endif - mptcp_sockopt_sync(msk, ssk); - ssk->sk_bound_dev_if = ifindex; err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); if (err) @@ -1543,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, remote_token, local_id, remote_id); subflow->remote_token = remote_token; - subflow->remote_id = remote_id; + WRITE_ONCE(subflow->remote_id, remote_id); subflow->request_join = 1; subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP); subflow->subflow_id = msk->subflow_id++; @@ -1644,7 +1668,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, err = security_mptcp_add_subflow(sk, sf->sk); if (err) - goto release_ssk; + goto err_free; /* the newly created socket has to be in the same cgroup as its parent */ mptcp_attach_cgroup(sk, sf->sk); @@ -1658,15 +1682,12 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); err = tcp_set_ulp(sf->sk, "mptcp"); + if (err) + goto err_free; -release_ssk: + mptcp_sockopt_sync_locked(mptcp_sk(sk), sf->sk); release_sock(sf->sk); - if (err) { - sock_release(sf); - return err; - } - /* the newly created socket really belongs to the owning MPTCP master * socket, even if for additional subflows the allocation is performed * by a kernel workqueue. Adjust inode references, so that the @@ -1686,6 +1707,11 @@ release_ssk: mptcp_subflow_ops_override(sf->sk); return 0; + +err_free: + release_sock(sf->sk); + sock_release(sf); + return err; } static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, @@ -1705,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, pr_debug("subflow=%p", ctx); ctx->tcp_sock = sk; + WRITE_ONCE(ctx->local_id, -1); return ctx; } @@ -1736,10 +1763,9 @@ static void subflow_state_change(struct sock *sk) msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { mptcp_do_fallback(sk); - mptcp_rcv_space_init(msk, sk); pr_fallback(msk); subflow->conn_finished = 1; - mptcp_propagate_state(parent, sk); + mptcp_propagate_state(parent, sk, subflow, NULL); } /* as recvmsg() does not acquire the subflow socket for ssk selection @@ -1941,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->idsn = subflow_req->idsn; /* this is the first subflow, id is always 0 */ - new_ctx->local_id_valid = 1; + subflow_set_local_id(new_ctx, 0); } else if (subflow_req->mp_join) { new_ctx->ssn_offset = subflow_req->ssn_offset; new_ctx->mp_join = 1; new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; - new_ctx->remote_id = subflow_req->remote_id; + WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; @@ -2062,7 +2088,6 @@ void __init mptcp_subflow_init(void) subflow_v6m_specific.send_check = ipv4_specific.send_check; subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; - subflow_v6m_specific.net_frag_header_len = 0; subflow_v6m_specific.rebuild_header = subflow_rebuild_header; tcpv6_prot_override = tcpv6_prot; diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c index 0758865ab6..bfff53e668 100644 --- a/net/mptcp/token_test.c +++ b/net/mptcp/token_test.c @@ -143,3 +143,4 @@ static struct kunit_suite mptcp_token_suite = { kunit_test_suite(mptcp_token_suite); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit tests for MPTCP Token"); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index ef4e76e5ae..3126911f50 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -639,10 +639,10 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, if (ret == 1) continue; return ret; + case NF_STOLEN: + return NF_DROP_GETERR(verdict); default: - /* Implicit handling for NF_STOLEN, as well as any other - * non conventional verdicts. - */ + WARN_ON_ONCE(1); return 0; } } diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 26ab0e9612..9523104a90 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -28,6 +28,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -57,9 +58,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -288,6 +286,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -301,6 +308,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 4c133e06be..3184cc6be4 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, return ret; cleanup: + set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); @@ -1182,6 +1183,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1193,8 +1202,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1206,8 +1213,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1221,6 +1230,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1228,6 +1239,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1238,10 +1252,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1394,9 +1414,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2362,6 +2379,7 @@ ip_set_net_exit(struct net *net) set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; + set->variant->cancel_gc(set); ip_set_destroy_set(set); } } @@ -2409,8 +2427,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7c23995417..20aad81fca 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -221,6 +221,7 @@ static const union nf_inet_addr zeromask = {}; #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -265,6 +266,7 @@ static const union nf_inet_addr zeromask = {}; #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -429,7 +431,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { - n = __ipset_dereference(hbucket(t, i)); + n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) @@ -449,10 +451,7 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); + mtype_ahash_destroy(set, (__force struct htable *)h->table, true); list_for_each_safe(l, lt, &h->ad) { list_del(l); kfree(l); @@ -598,6 +597,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1440,6 +1448,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e162636525..6c3f28bc59 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - timer_shutdown_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + timer_shutdown_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 3230506ae3..a2c16b5010 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2450,3 +2450,4 @@ static void __exit ip_vs_cleanup(void) module_init(ip_vs_init); module_exit(ip_vs_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP Virtual Server"); diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 5e6ec32aff..75f4c231f4 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -270,3 +270,4 @@ static void __exit ip_vs_dh_cleanup(void) module_init(ip_vs_dh_init); module_exit(ip_vs_dh_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs destination hashing scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c index b846cc3852..ab117e5bc3 100644 --- a/net/netfilter/ipvs/ip_vs_fo.c +++ b/net/netfilter/ipvs/ip_vs_fo.c @@ -72,3 +72,4 @@ static void __exit ip_vs_fo_cleanup(void) module_init(ip_vs_fo_init); module_exit(ip_vs_fo_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted failover scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index ef1f45e43b..f53899d124 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -635,3 +635,4 @@ static void __exit ip_vs_ftp_exit(void) module_init(ip_vs_ftp_init); module_exit(ip_vs_ftp_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs ftp helper"); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index cf78ba4ce5..8ceec7a2fa 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -632,3 +632,4 @@ static void __exit ip_vs_lblc_cleanup(void) module_init(ip_vs_lblc_init); module_exit(ip_vs_lblc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs locality-based least-connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 9eddf118b4..0fb6470721 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -817,3 +817,4 @@ static void __exit ip_vs_lblcr_cleanup(void) module_init(ip_vs_lblcr_init); module_exit(ip_vs_lblcr_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs locality-based least-connection with replication scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 9d34d81fc6..c2764505e3 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -86,3 +86,4 @@ static void __exit ip_vs_lc_cleanup(void) module_init(ip_vs_lc_init); module_exit(ip_vs_lc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs least connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index f56862a875..ed7f5c889b 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -136,3 +136,4 @@ static void __exit ip_vs_nq_cleanup(void) module_init(ip_vs_nq_init); module_exit(ip_vs_nq_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs never queue scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c index c03066fdd5..c7708b8097 100644 --- a/net/netfilter/ipvs/ip_vs_ovf.c +++ b/net/netfilter/ipvs/ip_vs_ovf.c @@ -79,3 +79,4 @@ static void __exit ip_vs_ovf_cleanup(void) module_init(ip_vs_ovf_init); module_exit(ip_vs_ovf_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs overflow connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 0ac6705a61..e4ce1d9a63 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -185,3 +185,4 @@ static void __exit ip_vs_sip_cleanup(void) module_init(ip_vs_sip_init); module_exit(ip_vs_sip_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs sip helper"); diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 38495c6f6c..6baa34dff9 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -122,4 +122,5 @@ static void __exit ip_vs_rr_cleanup(void) module_init(ip_vs_rr_init); module_exit(ip_vs_rr_cleanup); +MODULE_DESCRIPTION("ipvs round-robin scheduler"); MODULE_LICENSE("GPL"); diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 7663288e53..a46f99a566 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -137,3 +137,4 @@ static void __exit ip_vs_sed_cleanup(void) module_init(ip_vs_sed_init); module_exit(ip_vs_sed_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs shortest expected delay scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index c2028e4120..92e77d7a6b 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -376,3 +376,4 @@ static void __exit ip_vs_sh_cleanup(void) module_init(ip_vs_sh_init); module_exit(ip_vs_sh_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs source hashing scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 4174076c66..eaf9f2ed00 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1298,17 +1298,13 @@ static void set_sock_size(struct sock *sk, int mode, int val) static void set_mcast_loop(struct sock *sk, u_char loop) { /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ - lock_sock(sk); inet_assign_bit(MC_LOOP, sk, loop); #ifdef CONFIG_IP_VS_IPV6 - if (sk->sk_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - + if (READ_ONCE(sk->sk_family) == AF_INET6) { /* IPV6_MULTICAST_LOOP */ - np->mc_loop = loop ? 1 : 0; + inet6_assign_bit(MC6_LOOP, sk, loop); } #endif - release_sock(sk); } /* @@ -1320,13 +1316,13 @@ static void set_mcast_ttl(struct sock *sk, u_char ttl) /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ lock_sock(sk); - inet->mc_ttl = ttl; + WRITE_ONCE(inet->mc_ttl, ttl); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MULTICAST_HOPS */ - np->mcast_hops = ttl; + WRITE_ONCE(np->mcast_hops, ttl); } #endif release_sock(sk); @@ -1339,13 +1335,13 @@ static void set_mcast_pmtudisc(struct sock *sk, int val) /* setsockopt(sock, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)); */ lock_sock(sk); - inet->pmtudisc = val; + WRITE_ONCE(inet->pmtudisc, val); #ifdef CONFIG_IP_VS_IPV6 if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); /* IPV6_MTU_DISCOVER */ - np->pmtudisc = val; + WRITE_ONCE(np->pmtudisc, val); } #endif release_sock(sk); diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c index 3308e4cc74..8d5419edde 100644 --- a/net/netfilter/ipvs/ip_vs_twos.c +++ b/net/netfilter/ipvs/ip_vs_twos.c @@ -137,3 +137,4 @@ static void __exit ip_vs_twos_cleanup(void) module_init(ip_vs_twos_init); module_exit(ip_vs_twos_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs power of twos choice scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 09f584b564..9fa500927c 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -109,3 +109,4 @@ static void __exit ip_vs_wlc_cleanup(void) module_init(ip_vs_wlc_init); module_exit(ip_vs_wlc_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted least connection scheduler"); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 1bc7a0789d..85ce0d04af 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -263,3 +263,4 @@ static void __exit ip_vs_wrr_cleanup(void) module_init(ip_vs_wrr_init); module_exit(ip_vs_wrr_cleanup); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ipvs weighted round-robin scheduler"); diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c index b21799d468..475358ec82 100644 --- a/net/netfilter/nf_conntrack_bpf.c +++ b/net/netfilter/nf_conntrack_bpf.c @@ -230,9 +230,7 @@ static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log, return 0; } -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in nf_conntrack BTF"); +__bpf_kfunc_start_defs(); /* bpf_xdp_ct_alloc - Allocate a new CT entry * @@ -467,7 +465,7 @@ __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status) return nf_ct_change_status_common(nfct, status); } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(nf_ct_kfunc_set) BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL) diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 9fb9b80312..cfa0fe0356 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -82,3 +82,4 @@ out: EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Broadcast connection tracking helper"); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9f6f2e6435..2e5f3864d3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -2042,24 +2042,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_in); -/* Alter reply tuple (maybe alter helper). This is for NAT, and is - implicitly racy: see __nf_conntrack_confirm */ -void nf_conntrack_alter_reply(struct nf_conn *ct, - const struct nf_conntrack_tuple *newreply) -{ - struct nf_conn_help *help = nfct_help(ct); - - /* Should be unconfirmed, so not in hash table yet */ - WARN_ON(nf_ct_is_confirmed(ct)); - - nf_ct_dump_tuple(newreply); - - ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; - if (ct->master || (help && !hlist_empty(&help->expectations))) - return; -} -EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); - /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -2187,11 +2169,11 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num); if (dataoff <= 0) - return -1; + return NF_DROP; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, l4num, net, &tuple)) - return -1; + return NF_DROP; if (ct->status & IPS_SRC_NAT) { memcpy(tuple.src.u3.all, @@ -2211,7 +2193,7 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, h = nf_conntrack_find_get(net, nf_ct_zone(ct), &tuple); if (!h) - return 0; + return NF_ACCEPT; /* Store status bits of the conntrack that is clashing to re-do NAT * mangling according to what it has been done already to this packet. @@ -2224,19 +2206,25 @@ static int __nf_conntrack_update(struct net *net, struct sk_buff *skb, nat_hook = rcu_dereference(nf_nat_hook); if (!nat_hook) - return 0; + return NF_ACCEPT; - if (status & IPS_SRC_NAT && - nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_SRC, - IP_CT_DIR_ORIGINAL) == NF_DROP) - return -1; + if (status & IPS_SRC_NAT) { + unsigned int verdict = nat_hook->manip_pkt(skb, ct, + NF_NAT_MANIP_SRC, + IP_CT_DIR_ORIGINAL); + if (verdict != NF_ACCEPT) + return verdict; + } - if (status & IPS_DST_NAT && - nat_hook->manip_pkt(skb, ct, NF_NAT_MANIP_DST, - IP_CT_DIR_ORIGINAL) == NF_DROP) - return -1; + if (status & IPS_DST_NAT) { + unsigned int verdict = nat_hook->manip_pkt(skb, ct, + NF_NAT_MANIP_DST, + IP_CT_DIR_ORIGINAL); + if (verdict != NF_ACCEPT) + return verdict; + } - return 0; + return NF_ACCEPT; } /* This packet is coming from userspace via nf_queue, complete the packet @@ -2251,14 +2239,14 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, help = nfct_help(ct); if (!help) - return 0; + return NF_ACCEPT; helper = rcu_dereference(help->helper); if (!helper) - return 0; + return NF_ACCEPT; if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) - return 0; + return NF_ACCEPT; switch (nf_ct_l3num(ct)) { case NFPROTO_IPV4: @@ -2273,42 +2261,44 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) - return 0; + return NF_ACCEPT; break; } #endif default: - return 0; + return NF_ACCEPT; } if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb)) { if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); - return -1; + return NF_DROP; } } /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb) == NF_DROP ? - 1 : 0; + return nf_conntrack_confirm(skb); } static int nf_conntrack_update(struct net *net, struct sk_buff *skb) { enum ip_conntrack_info ctinfo; struct nf_conn *ct; - int err; ct = nf_ct_get(skb, &ctinfo); if (!ct) - return 0; + return NF_ACCEPT; if (!nf_ct_is_confirmed(ct)) { - err = __nf_conntrack_update(net, skb, ct, ctinfo); - if (err < 0) - return err; + int ret = __nf_conntrack_update(net, skb, ct, ctinfo); + + if (ret != NF_ACCEPT) + return ret; ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return NF_ACCEPT; } return nf_confirm_cthelper(skb, ct, ctinfo); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index f22691f838..4ed5878cb2 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -194,12 +194,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; - /* We already got a helper explicitly attached. The function - * nf_conntrack_alter_reply - in case NAT is in use - asks for looking - * the helper up again. Since now the user is in full control of - * making consistent helper configurations, skip this automatic - * re-lookup, otherwise we'll lose the helper. - */ + /* We already got a helper explicitly attached (e.g. nft_ct) */ if (test_bit(IPS_HELPER_BIT, &ct->status)) return 0; diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 6e70e137a0..6c46aad233 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -11,8 +11,6 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> -static DEFINE_SPINLOCK(nf_connlabels_lock); - static int replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; @@ -60,23 +58,24 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace); int nf_connlabels_get(struct net *net, unsigned int bits) { + int v; + if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long)) return -ERANGE; - spin_lock(&nf_connlabels_lock); - net->ct.labels_used++; - spin_unlock(&nf_connlabels_lock); - BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); + v = atomic_inc_return_relaxed(&net->ct.labels_used); + WARN_ON_ONCE(v <= 0); + return 0; } EXPORT_SYMBOL_GPL(nf_connlabels_get); void nf_connlabels_put(struct net *net) { - spin_lock(&nf_connlabels_lock); - net->ct.labels_used--; - spin_unlock(&nf_connlabels_lock); + int v = atomic_dec_return_relaxed(&net->ct.labels_used); + + WARN_ON_ONCE(v < 0); } EXPORT_SYMBOL_GPL(nf_connlabels_put); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 334db22199..fb0ae15e96 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -57,6 +57,7 @@ #include "nf_internals.h" MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("List and change connection tracking table"); struct ctnetlink_list_dump_ctx { struct nf_conn *last; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index c928ff63b1..f36727ed91 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -699,3 +699,4 @@ MODULE_ALIAS("ip_conntrack"); MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv4 and IPv6 connection tracking"); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6bd533983..4cc97f9712 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4018acb1d6..ae493599a3 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - u32 end, u32 win) + u32 end, u32 win, + enum ip_conntrack_dir dir) { /* SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. @@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, * Both sides must send the Window Scale option * to enable window scaling in either direction. */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + if (dir == IP_CT_DIR_REPLY && + !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { sender->td_scale = 0; receiver->td_scale = 0; @@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, if (tcph->syn) { tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (!tcph->ack) /* Simultaneous open */ return NFCT_TCP_ACCEPT; @@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, */ tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (dir == IP_CT_DIR_REPLY && !tcph->ack) return NFCT_TCP_ACCEPT; @@ -835,7 +837,8 @@ static bool tcp_error(const struct tcphdr *th, static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, - const struct tcphdr *th) + const struct tcphdr *th, + const struct nf_hook_state *state) { enum tcp_conntrack new_state; struct net *net = nf_ct_net(ct); @@ -846,7 +849,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Invalid: delete conntrack */ if (new_state >= TCP_CONNTRACK_MAX) { - pr_debug("nf_ct_tcp: invalid new deleting.\n"); + tcp_error_log(skb, state, "invalid new"); return false; } @@ -980,7 +983,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, if (tcp_error(th, skb, dataoff, state)) return -NF_ACCEPT; - if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th)) + if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th, state)) return -NF_ACCEPT; spin_lock_bh(&ct->lock); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 920a5a29ae..a057133923 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) return 0; } +static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, + enum flow_offload_tuple_dir dir) +{ + struct dst_entry *dst = route->tuple[dir].dst; + + route->tuple[dir].dst = NULL; + + return dst; +} + static int flow_offload_fill_route(struct flow_offload *flow, - const struct nf_flow_route *route, + struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; - struct dst_entry *dst = route->tuple[dir].dst; + struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { @@ -122,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; + dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: @@ -146,7 +157,7 @@ static void nft_flow_dst_release(struct flow_offload *flow, } void flow_offload_route_init(struct flow_offload *flow, - const struct nf_flow_route *route) + struct nf_flow_route *route) { flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8cc52d2bd3..e16f158388 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) return; } - BUG_ON(loggers[pf][type] == NULL); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); - module_put(logger->me); + if (!logger) + WARN_ON_ONCE(1); + else + module_put(logger->me); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_logger_put); diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c index 141ee77832..6e3b2f5885 100644 --- a/net/netfilter/nf_nat_bpf.c +++ b/net/netfilter/nf_nat_bpf.c @@ -12,9 +12,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_nat.h> -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in nf_nat BTF"); +__bpf_kfunc_start_defs(); /* bpf_ct_set_nat_info - Set source or destination nat address * @@ -54,7 +52,7 @@ __bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct, return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(nf_nat_kfunc_set) BTF_ID_FLAGS(func, bpf_ct_set_nat_info, KF_TRUSTED_ARGS) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index c4e0516a8d..c3d7ecbc77 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -1263,6 +1263,7 @@ static void __exit nf_nat_cleanup(void) } MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Network address translation core"); module_init(nf_nat_init); module_exit(nf_nat_cleanup); diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 48cc60084d..dc450cc812 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -668,7 +668,7 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int struct flowi fl; int err; - err = xfrm_decode_session(skb, &fl, family); + err = xfrm_decode_session(net, skb, &fl, family); if (err < 0) return err; @@ -697,6 +697,31 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int } #endif +static bool nf_nat_inet_port_was_mangled(const struct sk_buff *skb, __be16 sport) +{ + enum ip_conntrack_info ctinfo; + enum ip_conntrack_dir dir; + const struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + return false; + + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + return false; + } + + dir = CTINFO2DIR(ctinfo); + if (dir != IP_CT_DIR_ORIGINAL) + return false; + + return ct->tuplehash[!dir].tuple.dst.u.all != sport; +} + static unsigned int nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -707,8 +732,20 @@ nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, ret = nf_nat_ipv4_fn(priv, skb, state); - if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr && - !inet_sk_transparent(sk)) + if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) + return ret; + + /* skb has a socket assigned via tcp edemux. We need to check + * if nf_nat_ipv4_fn() has mangled the packet in a way that + * edemux would not have found this socket. + * + * This includes both changes to the source address and changes + * to the source port, which are both handled by the + * nf_nat_ipv4_fn() call above -- long after tcp/udp early demux + * might have found a socket for the old (pre-snat) address. + */ + if (saddr != ip_hdr(skb)->saddr || + nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) skb_orphan(skb); /* TCP edemux obtained wrong socket */ return ret; @@ -938,14 +975,36 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, } static unsigned int +nf_nat_ipv6_local_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct in6_addr saddr = ipv6_hdr(skb)->saddr; + struct sock *sk = skb->sk; + unsigned int ret; + + ret = nf_nat_ipv6_fn(priv, skb, state); + + if (ret != NF_ACCEPT || !sk || inet_sk_transparent(sk)) + return ret; + + /* see nf_nat_ipv4_local_in */ + if (ipv6_addr_cmp(&saddr, &ipv6_hdr(skb)->saddr) || + nf_nat_inet_port_was_mangled(skb, sk->sk_dport)) + skb_orphan(skb); + + return ret; +} + +static unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - unsigned int ret; + unsigned int ret, verdict; struct in6_addr daddr = ipv6_hdr(skb)->daddr; ret = nf_nat_ipv6_fn(priv, skb, state); - if (ret != NF_DROP && ret != NF_STOLEN && + verdict = ret & NF_VERDICT_MASK; + if (verdict != NF_DROP && verdict != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -1051,7 +1110,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = { }, /* After packet filtering, change source */ { - .hook = nf_nat_ipv6_fn, + .hook = nf_nat_ipv6_local_in, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_NAT_SRC, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 16915f8eef..467671f2d4 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -153,7 +153,7 @@ void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, struct synproxy_options *opts) { opts->tsecr = opts->tsval; - opts->tsval = tcp_time_stamp_raw() & ~0x3f; + opts->tsval = tcp_clock_ms() & ~0x3f; if (opts->options & NF_SYNPROXY_OPT_WSCALE) { opts->tsval |= opts->wscale; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 4fc8348dd7..79e088e6f1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -592,9 +592,9 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, static int nft_mapelem_deactivate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - nft_setelem_data_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem_priv); return 0; } @@ -602,7 +602,7 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx, struct nft_set_elem_catchall { struct list_head list; struct rcu_head rcu; - void *elem; + struct nft_elem_priv *elem; }; static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, @@ -610,7 +610,6 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; list_for_each_entry(catchall, &set->catchall_list, list) { @@ -618,8 +617,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - nft_setelem_data_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, catchall->elem); break; } } @@ -686,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj) return err; } -static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, - struct nft_flowtable *flowtable) +static struct nft_trans * +nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, + struct nft_flowtable *flowtable) { struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_flowtable)); if (trans == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (msg_type == NFT_MSG_NEWFLOWTABLE) nft_activate_next(ctx->net, flowtable); @@ -703,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type, nft_trans_flowtable(trans) = flowtable; nft_trans_commit_list_add_tail(ctx->net, trans); - return 0; + return trans; } static int nft_delflowtable(struct nft_ctx *ctx, struct nft_flowtable *flowtable) { - int err; + struct nft_trans *trans; - err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); - if (err < 0) - return err; + trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable); + if (IS_ERR(trans)) + return PTR_ERR(trans); nft_deactivate_next(ctx->net, flowtable); nft_use_dec(&ctx->table->use); - return err; + return 0; } static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) @@ -1253,6 +1252,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) return 0; err_register_hooks: + ctx->table->flags |= NFT_TABLE_F_DORMANT; nft_trans_destroy(trans); return ret; } @@ -2082,7 +2082,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, struct nft_hook *hook; int err; - hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); + hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT); if (!hook) { err = -ENOMEM; goto err_hook_alloc; @@ -2505,19 +2505,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, RCU_INIT_POINTER(chain->blob_gen_0, blob); RCU_INIT_POINTER(chain->blob_gen_1, blob); - err = nf_tables_register_hook(net, table, chain); - if (err < 0) - goto err_destroy_chain; - if (!nft_use_inc(&table->use)) { err = -EMFILE; - goto err_use; + goto err_destroy_chain; } trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err_unregister_hook; + goto err_trans; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2525,17 +2521,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, nft_trans_chain_policy(trans) = policy; err = nft_chain_add(table, chain); - if (err < 0) { - nft_trans_destroy(trans); - goto err_unregister_hook; - } + if (err < 0) + goto err_chain_add; + + /* This must be LAST to ensure no packets are walking over this chain. */ + err = nf_tables_register_hook(net, table, chain); + if (err < 0) + goto err_register_hook; return 0; -err_unregister_hook: +err_register_hook: + nft_chain_del(chain); +err_chain_add: + nft_trans_destroy(trans); +err_trans: nft_use_dec_restore(&table->use); -err_use: - nf_tables_unregister_hook(net, table, chain); err_destroy_chain: nf_tables_chain_destroy(ctx); @@ -3327,7 +3328,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_CHAIN] = { .type = NLA_STRING, .len = NFT_CHAIN_MAXNAMELEN - 1 }, [NFTA_RULE_HANDLE] = { .type = NLA_U64 }, - [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_RULE_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), [NFTA_RULE_COMPAT] = { .type = NLA_NESTED }, [NFTA_RULE_POSITION] = { .type = NLA_U64 }, [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, @@ -3452,20 +3453,21 @@ static void audit_log_rule_reset(const struct nft_table *table, } struct nft_rule_dump_ctx { + unsigned int s_idx; char *table; char *chain; + bool reset; }; static int __nf_tables_dump_rules(struct sk_buff *skb, unsigned int *idx, struct netlink_callback *cb, const struct nft_table *table, - const struct nft_chain *chain, - bool reset) + const struct nft_chain *chain) { + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); const struct nft_rule *rule, *prule; - unsigned int s_idx = cb->args[0]; unsigned int entries = 0; int ret = 0; u64 handle; @@ -3474,7 +3476,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) goto cont_skip; - if (*idx < s_idx) + if (*idx < ctx->s_idx) goto cont; if (prule) handle = prule->handle; @@ -3486,7 +3488,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, NFT_MSG_NEWRULE, NLM_F_MULTI | NLM_F_APPEND, table->family, - table, chain, rule, handle, reset) < 0) { + table, chain, rule, handle, ctx->reset) < 0) { ret = 1; break; } @@ -3498,7 +3500,7 @@ cont_skip: (*idx)++; } - if (reset && entries) + if (ctx->reset && entries) audit_log_rule_reset(table, cb->seq, entries); return ret; @@ -3508,17 +3510,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_rule_dump_ctx *ctx = cb->data; + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; struct nft_table *table; const struct nft_chain *chain; unsigned int idx = 0; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; - bool reset = false; - - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) - reset = true; rcu_read_lock(); nft_net = nft_pernet(net); @@ -3528,10 +3526,10 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (family != NFPROTO_UNSPEC && family != table->family) continue; - if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) + if (ctx->table && strcmp(ctx->table, table->name) != 0) continue; - if (ctx && ctx->table && ctx->chain) { + if (ctx->table && ctx->chain) { struct rhlist_head *list, *tmp; list = rhltable_lookup(&table->chains_ht, ctx->chain, @@ -3543,7 +3541,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, if (!nft_is_active(net, chain)) continue; __nf_tables_dump_rules(skb, &idx, - cb, table, chain, reset); + cb, table, chain); break; } goto done; @@ -3551,68 +3549,81 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(chain, &table->chains, list) { if (__nf_tables_dump_rules(skb, &idx, - cb, table, chain, reset)) + cb, table, chain)) goto done; } - if (ctx && ctx->table) + if (ctx->table) break; } done: rcu_read_unlock(); - cb->args[0] = idx; + ctx->s_idx = idx; return skb->len; } +static int nf_tables_dumpreset_rules(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + int ret; + + /* Mutex is held is to prevent that two concurrent dump-and-reset calls + * do not underrun counters and quotas. The commit_mutex is used for + * the lack a better lock, this is not transaction path. + */ + mutex_lock(&nft_net->commit_mutex); + ret = nf_tables_dump_rules(skb, cb); + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_rules_start(struct netlink_callback *cb) { + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; const struct nlattr * const *nla = cb->data; - struct nft_rule_dump_ctx *ctx = NULL; - if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { - ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); - if (!ctx) - return -ENOMEM; + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); - if (nla[NFTA_RULE_TABLE]) { - ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], - GFP_ATOMIC); - if (!ctx->table) { - kfree(ctx); - return -ENOMEM; - } - } - if (nla[NFTA_RULE_CHAIN]) { - ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], - GFP_ATOMIC); - if (!ctx->chain) { - kfree(ctx->table); - kfree(ctx); - return -ENOMEM; - } + if (nla[NFTA_RULE_TABLE]) { + ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], GFP_ATOMIC); + if (!ctx->table) + return -ENOMEM; + } + if (nla[NFTA_RULE_CHAIN]) { + ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], GFP_ATOMIC); + if (!ctx->chain) { + kfree(ctx->table); + return -ENOMEM; } } - - cb->data = ctx; return 0; } +static int nf_tables_dumpreset_rules_start(struct netlink_callback *cb) +{ + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; + + ctx->reset = true; + + return nf_tables_dump_rules_start(cb); +} + static int nf_tables_dump_rules_done(struct netlink_callback *cb) { - struct nft_rule_dump_ctx *ctx = cb->data; + struct nft_rule_dump_ctx *ctx = (void *)cb->ctx; - if (ctx) { - kfree(ctx->table); - kfree(ctx->chain); - kfree(ctx); - } + kfree(ctx->table); + kfree(ctx->chain); return 0; } /* called with rcu_read_lock held */ -static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const nla[]) +static struct sk_buff * +nf_tables_getrule_single(u32 portid, const struct nfnl_info *info, + const struct nlattr * const nla[], bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); @@ -3622,60 +3633,110 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, struct net *net = info->net; struct nft_table *table; struct sk_buff *skb2; - bool reset = false; int err; - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start= nf_tables_dump_rules_start, - .dump = nf_tables_dump_rules, - .done = nf_tables_dump_rules_done, - .module = THIS_MODULE, - .data = (void *)nla, - }; - - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_TABLE]); - return PTR_ERR(table); + return ERR_CAST(table); } chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); - return PTR_ERR(chain); + return ERR_CAST(chain); } rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]); - return PTR_ERR(rule); + return ERR_CAST(rule); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) - return -ENOMEM; - - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETRULE_RESET) - reset = true; + return ERR_PTR(-ENOMEM); - err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, + err = nf_tables_fill_rule_info(skb2, net, portid, info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, family, table, chain, rule, 0, reset); - if (err < 0) - goto err_fill_rule_info; + if (err < 0) { + kfree_skb(skb2); + return ERR_PTR(err); + } - if (reset) - audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1); + return skb2; +} - return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); +static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; -err_fill_rule_info: - kfree_skb(skb2); - return err; + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start= nf_tables_dump_rules_start, + .dump = nf_tables_dump_rules, + .done = nf_tables_dump_rules_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + skb2 = nf_tables_getrule_single(portid, info, nla, false); + if (IS_ERR(skb2)) + return PTR_ERR(skb2); + + return nfnetlink_unicast(skb2, net, portid); +} + +static int nf_tables_getrule_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start= nf_tables_dumpreset_rules_start, + .dump = nf_tables_dumpreset_rules, + .done = nf_tables_dump_rules_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + skb2 = nf_tables_getrule_single(portid, info, nla, true); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); + + if (IS_ERR(skb2)) + return PTR_ERR(skb2); + + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", + nla_len(nla[NFTA_RULE_TABLE]), + (char *)nla_data(nla[NFTA_RULE_TABLE]), + nft_net->base_seq); + audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, + AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC); + kfree(buf); + + return nfnetlink_unicast(skb2, net, portid); } void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule) @@ -3758,9 +3819,9 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_ctx *pctx = (struct nft_ctx *)ctx; const struct nft_data *data; int err; @@ -3790,7 +3851,6 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; int ret = 0; @@ -3799,8 +3859,7 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - ret = nft_setelem_validate(ctx, set, NULL, &elem); + ret = nft_setelem_validate(ctx, set, NULL, catchall->elem); if (ret < 0) return ret; } @@ -4261,12 +4320,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_SET_HANDLE] = { .type = NLA_U64 }, [NFTA_SET_EXPR] = { .type = NLA_NESTED }, - [NFTA_SET_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_SET_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), +}; + +static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = { + [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 }, - [NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED }, + [NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy), }; static struct nft_set *nft_set_lookup(const struct nft_table *table, @@ -4705,8 +4768,10 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info, return -EINVAL; set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return PTR_ERR(set); + } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb2 == NULL) @@ -4723,10 +4788,6 @@ err_fill_set_info: return err; } -static const struct nla_policy nft_concat_policy[NFTA_SET_FIELD_MAX + 1] = { - [NFTA_SET_FIELD_LEN] = { .type = NLA_U32 }, -}; - static int nft_set_desc_concat_parse(const struct nlattr *attr, struct nft_set_desc *desc) { @@ -5269,9 +5330,9 @@ static int nft_validate_register_store(const struct nft_ctx *ctx, static int nft_setelem_data_validate(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); @@ -5284,9 +5345,9 @@ static int nft_setelem_data_validate(const struct nft_ctx *ctx, static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - return nft_setelem_data_validate(ctx, set, elem); + return nft_setelem_data_validate(ctx, set, elem_priv); } static int nft_set_catchall_bind_check(const struct nft_ctx *ctx, @@ -5294,7 +5355,6 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx, { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; int ret = 0; @@ -5303,8 +5363,7 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - ret = nft_setelem_data_validate(ctx, set, &elem); + ret = nft_setelem_data_validate(ctx, set, catchall->elem); if (ret < 0) break; } @@ -5371,14 +5430,14 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, static void nft_setelem_data_activate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem); + struct nft_elem_priv *elem_priv); static int nft_mapelem_activate(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - nft_setelem_data_activate(ctx->net, set, elem); + nft_setelem_data_activate(ctx->net, set, elem_priv); return 0; } @@ -5388,7 +5447,6 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; list_for_each_entry(catchall, &set->catchall_list, list) { @@ -5396,8 +5454,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - nft_setelem_data_activate(ctx->net, set, &elem); + nft_setelem_data_activate(ctx->net, set, catchall->elem); break; } } @@ -5524,7 +5581,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_OBJREF] = { .type = NLA_STRING, .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_KEY_END] = { .type = NLA_NESTED }, - [NFTA_SET_ELEM_EXPRESSIONS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_EXPRESSIONS] = NLA_POLICY_NESTED_ARRAY(nft_expr_policy), }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { @@ -5532,7 +5589,7 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, + [NFTA_SET_ELEM_LIST_ELEMENTS] = NLA_POLICY_NESTED_ARRAY(nft_set_elem_policy), [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, }; @@ -5576,10 +5633,10 @@ nla_put_failure: static int nf_tables_fill_setelem(struct sk_buff *skb, const struct nft_set *set, - const struct nft_set_elem *elem, + const struct nft_elem_priv *elem_priv, bool reset) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -5665,16 +5722,16 @@ struct nft_set_dump_args { static int nf_tables_dump_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_set_dump_args *args; if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) return 0; args = container_of(iter, struct nft_set_dump_args, iter); - return nf_tables_fill_setelem(args->skb, set, elem, args->reset); + return nf_tables_fill_setelem(args->skb, set, elem_priv, args->reset); } static void audit_log_nft_set_reset(const struct nft_table *table, @@ -5691,6 +5748,7 @@ static void audit_log_nft_set_reset(const struct nft_table *table, struct nft_set_dump_ctx { const struct nft_set *set; struct nft_ctx ctx; + bool reset; }; static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, @@ -5699,7 +5757,6 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_cur(net); - struct nft_set_elem elem; struct nft_set_ext *ext; int ret = 0; @@ -5709,8 +5766,7 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb, nft_set_elem_expired(ext)) continue; - elem.priv = catchall->elem; - ret = nf_tables_fill_setelem(skb, set, &elem, reset); + ret = nf_tables_fill_setelem(skb, set, catchall->elem, reset); if (reset && !ret) audit_log_nft_set_reset(set->table, base_seq, 1); break; @@ -5730,7 +5786,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) bool set_found = false; struct nlmsghdr *nlh; struct nlattr *nest; - bool reset = false; u32 portid, seq; int event; @@ -5778,12 +5833,9 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) - reset = true; - args.cb = cb; args.skb = skb; - args.reset = reset; + args.reset = dump_ctx->reset; args.iter.genmask = nft_genmask_cur(net); args.iter.skip = cb->args[0]; args.iter.count = 0; @@ -5793,11 +5845,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (!args.iter.err && args.iter.count == cb->args[0]) args.iter.err = nft_set_catchall_dump(net, skb, set, - reset, cb->seq); + dump_ctx->reset, cb->seq); nla_nest_end(skb, nest); nlmsg_end(skb, nlh); - if (reset && args.iter.count > args.iter.skip) + if (dump_ctx->reset && args.iter.count > args.iter.skip) audit_log_nft_set_reset(table, cb->seq, args.iter.count - args.iter.skip); @@ -5835,7 +5887,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq, u32 portid, int event, u16 flags, const struct nft_set *set, - const struct nft_set_elem *elem, + const struct nft_elem_priv *elem_priv, bool reset) { struct nlmsghdr *nlh; @@ -5857,7 +5909,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb, if (nest == NULL) goto nla_put_failure; - err = nf_tables_fill_setelem(skb, set, elem, reset); + err = nf_tables_fill_setelem(skb, set, elem_priv, reset); if (err < 0) goto nla_put_failure; @@ -6007,7 +6059,7 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, - NFT_MSG_NEWSETELEM, 0, set, &elem, + NFT_MSG_NEWSETELEM, 0, set, elem.priv, reset); if (err < 0) goto err_fill_setelem; @@ -6043,11 +6095,16 @@ static int nf_tables_getsetelem(struct sk_buff *skb, } set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) + reset = true; + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_set_start, @@ -6058,6 +6115,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb, struct nft_set_dump_ctx dump_ctx = { .set = set, .ctx = ctx, + .reset = reset, }; c.data = &dump_ctx; @@ -6067,9 +6125,6 @@ static int nf_tables_getsetelem(struct sk_buff *skb, if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return -EINVAL; - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET) - reset = true; - nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_get_set_elem(&ctx, set, attr, reset); if (err < 0) { @@ -6088,7 +6143,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb, static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, - const struct nft_set_elem *elem, + const struct nft_elem_priv *elem_priv, int event) { struct nftables_pernet *nft_net; @@ -6109,7 +6164,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL); err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags, - set, elem, false); + set, elem_priv, false); if (err < 0) { kfree_skb(skb); goto err; @@ -6184,10 +6239,11 @@ static int nft_set_ext_memcpy(const struct nft_set_ext_tmpl *tmpl, u8 id, return 0; } -void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const u32 *key, const u32 *key_end, - const u32 *data, u64 timeout, u64 expiration, gfp_t gfp) +struct nft_elem_priv *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *key_end, + const u32 *data, + u64 timeout, u64 expiration, gfp_t gfp) { struct nft_set_ext *ext; void *elem; @@ -6252,10 +6308,11 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx, } /* Drop references and destroy. Called from gc, dynset and abort path. */ -void nft_set_elem_destroy(const struct nft_set *set, void *elem, +void nft_set_elem_destroy(const struct nft_set *set, + const struct nft_elem_priv *elem_priv, bool destroy_expr) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); struct nft_ctx ctx = { .net = read_pnet(&set->net), .family = set->table->family, @@ -6266,10 +6323,10 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem, nft_data_release(nft_set_ext_data(ext), set->dtype); if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); - if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) nft_use_dec(&(*nft_set_ext_obj(ext))->use); - kfree(elem); + + kfree(elem_priv); } EXPORT_SYMBOL_GPL(nft_set_elem_destroy); @@ -6277,14 +6334,15 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy); * path via nft_setelem_data_deactivate(). */ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx, - const struct nft_set *set, void *elem) + const struct nft_set *set, + const struct nft_elem_priv *elem_priv) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext)); - kfree(elem); + kfree(elem_priv); } int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, @@ -6379,7 +6437,7 @@ EXPORT_SYMBOL_GPL(nft_set_catchall_lookup); static int nft_setelem_catchall_insert(const struct net *net, struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **pext) + struct nft_elem_priv **priv) { struct nft_set_elem_catchall *catchall; u8 genmask = nft_genmask_next(net); @@ -6388,7 +6446,7 @@ static int nft_setelem_catchall_insert(const struct net *net, list_for_each_entry(catchall, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); if (nft_set_elem_active(ext, genmask)) { - *pext = ext; + *priv = catchall->elem; return -EEXIST; } } @@ -6406,22 +6464,23 @@ static int nft_setelem_catchall_insert(const struct net *net, static int nft_setelem_insert(const struct net *net, struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext, unsigned int flags) + struct nft_elem_priv **elem_priv, + unsigned int flags) { int ret; if (flags & NFT_SET_ELEM_CATCHALL) - ret = nft_setelem_catchall_insert(net, set, elem, ext); + ret = nft_setelem_catchall_insert(net, set, elem, elem_priv); else - ret = set->ops->insert(net, set, elem, ext); + ret = set->ops->insert(net, set, elem, elem_priv); return ret; } static bool nft_setelem_is_catchall(const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_elem_priv *elem_priv) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_CATCHALL) @@ -6431,14 +6490,14 @@ static bool nft_setelem_is_catchall(const struct nft_set *set, } static void nft_setelem_activate(struct net *net, struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); - if (nft_setelem_is_catchall(set, elem)) { + if (nft_setelem_is_catchall(set, elem_priv)) { nft_set_elem_change_active(net, set, ext); } else { - set->ops->activate(net, set, elem); + set->ops->activate(net, set, elem_priv); } } @@ -6502,12 +6561,12 @@ static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall) static void nft_setelem_catchall_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_set_elem_catchall *catchall, *next; list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { - if (catchall->elem == elem->priv) { + if (catchall->elem == elem_priv) { nft_setelem_catchall_destroy(catchall); break; } @@ -6516,12 +6575,12 @@ static void nft_setelem_catchall_remove(const struct net *net, static void nft_setelem_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - if (nft_setelem_is_catchall(set, elem)) - nft_setelem_catchall_remove(net, set, elem); + if (nft_setelem_is_catchall(set, elem_priv)) + nft_setelem_catchall_remove(net, set, elem_priv); else - set->ops->remove(net, set, elem); + set->ops->remove(net, set, elem_priv); } static bool nft_setelem_valid_key_end(const struct nft_set *set, @@ -6554,13 +6613,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_elem_priv *elem_priv; struct nft_object *obj = NULL; struct nft_userdata *udata; struct nft_data_desc desc; enum nft_registers dreg; struct nft_trans *trans; - u64 timeout; u64 expiration; + u64 timeout; int err, i; u8 ulen; @@ -6853,9 +6913,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, ext->genmask = nft_genmask_cur(ctx->net); - err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags); + err = nft_setelem_insert(ctx->net, set, &elem, &elem_priv, flags); if (err) { if (err == -EEXIST) { + ext2 = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ @@ -6889,12 +6950,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, } } - nft_trans_elem(trans) = elem; + nft_trans_elem_priv(trans) = elem.priv; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; err_set_full: - nft_setelem_remove(ctx->net, set, &elem); + nft_setelem_remove(ctx->net, set, elem.priv); err_element_clash: kfree(trans); err_elem_free: @@ -6942,8 +7003,10 @@ static int nf_tables_newsetelem(struct sk_buff *skb, set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET], nla[NFTA_SET_ELEM_LIST_SET_ID], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } if (!list_empty(&set->bindings) && (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS))) @@ -6993,9 +7056,9 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type) static void nft_setelem_data_activate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_hold(nft_set_ext_data(ext), set->dtype); @@ -7005,9 +7068,9 @@ static void nft_setelem_data_activate(const struct net *net, void nft_setelem_data_deactivate(const struct net *net, const struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_release(nft_set_ext_data(ext), set->dtype); @@ -7092,9 +7155,9 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto fail_ops; - nft_setelem_data_deactivate(ctx->net, set, &elem); + nft_setelem_data_deactivate(ctx->net, set, elem.priv); - nft_trans_elem(trans) = elem; + nft_trans_elem_priv(trans) = elem.priv; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7112,36 +7175,29 @@ fail_elem: static int nft_setelem_flush(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_trans *trans; - int err; trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, sizeof(struct nft_trans_elem), GFP_ATOMIC); if (!trans) return -ENOMEM; - if (!set->ops->flush(ctx->net, set, elem->priv)) { - err = -ENOENT; - goto err1; - } + set->ops->flush(ctx->net, set, elem_priv); set->ndeact++; - nft_setelem_data_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem_priv); nft_trans_elem_set(trans) = set; - nft_trans_elem(trans) = *elem; + nft_trans_elem_priv(trans) = elem_priv; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; -err1: - kfree(trans); - return err; } static int __nft_set_catchall_flush(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_trans *trans; @@ -7150,9 +7206,9 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx, if (!trans) return -ENOMEM; - nft_setelem_data_deactivate(ctx->net, set, elem); + nft_setelem_data_deactivate(ctx->net, set, elem_priv); nft_trans_elem_set(trans) = set; - nft_trans_elem(trans) = *elem; + nft_trans_elem_priv(trans) = elem_priv; nft_trans_commit_list_add_tail(ctx->net, trans); return 0; @@ -7163,7 +7219,6 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, { u8 genmask = nft_genmask_next(ctx->net); struct nft_set_elem_catchall *catchall; - struct nft_set_elem elem; struct nft_set_ext *ext; int ret = 0; @@ -7172,8 +7227,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx, if (!nft_set_elem_active(ext, genmask)) continue; - elem.priv = catchall->elem; - ret = __nft_set_catchall_flush(ctx, set, &elem); + ret = __nft_set_catchall_flush(ctx, set, catchall->elem); if (ret < 0) break; nft_set_elem_change_active(ctx->net, set, ext); @@ -7218,8 +7272,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb, } set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); - if (IS_ERR(set)) + if (IS_ERR(set)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]); return PTR_ERR(set); + } if (nft_set_is_anonymous(set)) return -EOPNOTSUPP; @@ -7415,11 +7471,15 @@ nla_put_failure: return -1; } -static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; list_for_each_entry(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; + if (objtype == type->type) return type; } @@ -7427,11 +7487,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype) } static const struct nft_object_type * -nft_obj_type_get(struct net *net, u32 objtype) +nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (type != NULL && try_module_get(type->owner)) return type; @@ -7524,7 +7584,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (WARN_ON_ONCE(!type)) return -ENOENT; @@ -7538,7 +7598,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&table->use)) return -EMFILE; - type = nft_obj_type_get(net, objtype); + type = nft_obj_type_get(net, objtype, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err_type; @@ -7649,28 +7709,26 @@ static void audit_log_obj_reset(const struct nft_table *table, kfree(buf); } -struct nft_obj_filter { +struct nft_obj_dump_ctx { + unsigned int s_idx; char *table; u32 type; + bool reset; }; static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); - const struct nft_table *table; - unsigned int idx = 0, s_idx = cb->args[0]; - struct nft_obj_filter *filter = cb->data; + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); int family = nfmsg->nfgen_family; struct nftables_pernet *nft_net; + const struct nft_table *table; unsigned int entries = 0; struct nft_object *obj; - bool reset = false; + unsigned int idx = 0; int rc = 0; - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - reset = true; - rcu_read_lock(); nft_net = nft_pernet(net); cb->seq = READ_ONCE(nft_net->base_seq); @@ -7683,17 +7741,12 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry_rcu(obj, &table->objects, list) { if (!nft_is_active(net, obj)) goto cont; - if (idx < s_idx) + if (idx < ctx->s_idx) goto cont; - if (idx > s_idx) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - if (filter && filter->table && - strcmp(filter->table, table->name)) + if (ctx->table && strcmp(ctx->table, table->name)) goto cont; - if (filter && - filter->type != NFT_OBJECT_UNSPEC && - obj->ops->type->type != filter->type) + if (ctx->type != NFT_OBJECT_UNSPEC && + obj->ops->type->type != ctx->type) goto cont; rc = nf_tables_fill_obj_info(skb, net, @@ -7702,7 +7755,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) NFT_MSG_NEWOBJ, NLM_F_MULTI | NLM_F_APPEND, table->family, table, - obj, reset); + obj, ctx->reset); if (rc < 0) break; @@ -7711,51 +7764,44 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) cont: idx++; } - if (reset && entries) + if (ctx->reset && entries) audit_log_obj_reset(table, nft_net->base_seq, entries); if (rc < 0) break; } rcu_read_unlock(); - cb->args[0] = idx; + ctx->s_idx = idx; return skb->len; } static int nf_tables_dump_obj_start(struct netlink_callback *cb) { + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; const struct nlattr * const *nla = cb->data; - struct nft_obj_filter *filter = NULL; - if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) { - filter = kzalloc(sizeof(*filter), GFP_ATOMIC); - if (!filter) + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + if (nla[NFTA_OBJ_TABLE]) { + ctx->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); + if (!ctx->table) return -ENOMEM; + } - if (nla[NFTA_OBJ_TABLE]) { - filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); - if (!filter->table) { - kfree(filter); - return -ENOMEM; - } - } + if (nla[NFTA_OBJ_TYPE]) + ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - if (nla[NFTA_OBJ_TYPE]) - filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - } + if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) + ctx->reset = true; - cb->data = filter; return 0; } static int nf_tables_dump_obj_done(struct netlink_callback *cb) { - struct nft_obj_filter *filter = cb->data; + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; - if (filter) { - kfree(filter->table); - kfree(filter); - } + kfree(ctx->table); return 0; } @@ -8329,9 +8375,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb, u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; struct nft_flowtable *flowtable; - struct nft_hook *hook, *next; struct net *net = info->net; struct nft_table *table; + struct nft_trans *trans; struct nft_ctx ctx; int err; @@ -8411,34 +8457,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb, err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable, extack, true); if (err < 0) - goto err4; + goto err_flowtable_parse_hooks; list_splice(&flowtable_hook.list, &flowtable->hook_list); flowtable->data.priority = flowtable_hook.priority; flowtable->hooknum = flowtable_hook.num; + trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto err_flowtable_trans; + } + + /* This must be LAST to ensure no packets are walking over this flowtable. */ err = nft_register_flowtable_net_hooks(ctx.net, table, &flowtable->hook_list, flowtable); - if (err < 0) { - nft_hooks_destroy(&flowtable->hook_list); - goto err4; - } - - err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable); if (err < 0) - goto err5; + goto err_flowtable_hooks; list_add_tail_rcu(&flowtable->list, &table->flowtables); return 0; -err5: - list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) { - nft_unregister_flowtable_hook(net, flowtable, hook); - list_del_rcu(&hook->list); - kfree_rcu(hook, rcu); - } -err4: + +err_flowtable_hooks: + nft_trans_destroy(trans); +err_flowtable_trans: + nft_hooks_destroy(&flowtable->hook_list); +err_flowtable_parse_hooks: flowtable->data.type->free(&flowtable->data); err3: module_put(type->owner); @@ -8722,6 +8768,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { + struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); u8 family = info->nfmsg->nfgen_family; struct nft_flowtable *flowtable; @@ -8747,13 +8794,17 @@ static int nf_tables_getflowtable(struct sk_buff *skb, table = nft_table_lookup(net, nla[NFTA_FLOWTABLE_TABLE], family, genmask, 0); - if (IS_ERR(table)) + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_TABLE]); return PTR_ERR(table); + } flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME], genmask); - if (IS_ERR(flowtable)) + if (IS_ERR(flowtable)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]); return PTR_ERR(flowtable); + } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) @@ -9009,7 +9060,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_rule_policy, }, [NFT_MSG_GETRULE_RESET] = { - .call = nf_tables_getrule, + .call = nf_tables_getrule_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_RULE_MAX, .policy = nft_rule_policy, @@ -9259,7 +9310,7 @@ static void nft_commit_release(struct nft_trans *trans) case NFT_MSG_DESTROYSETELEM: nf_tables_set_elem_destroy(&trans->ctx, nft_trans_elem_set(trans), - nft_trans_elem(trans).priv); + nft_trans_elem_priv(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: @@ -9488,16 +9539,12 @@ void nft_chain_del(struct nft_chain *chain) static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx, struct nft_trans_gc *trans) { - void **priv = trans->priv; + struct nft_elem_priv **priv = trans->priv; unsigned int i; for (i = 0; i < trans->count; i++) { - struct nft_set_elem elem = { - .priv = priv[i], - }; - - nft_setelem_data_deactivate(ctx->net, trans->set, &elem); - nft_setelem_remove(ctx->net, trans->set, &elem); + nft_setelem_data_deactivate(ctx->net, trans->set, priv[i]); + nft_setelem_remove(ctx->net, trans->set, priv[i]); } } @@ -9510,7 +9557,7 @@ void nft_trans_gc_destroy(struct nft_trans_gc *trans) static void nft_trans_gc_trans_free(struct rcu_head *rcu) { - struct nft_set_elem elem = {}; + struct nft_elem_priv *elem_priv; struct nft_trans_gc *trans; struct nft_ctx ctx = {}; unsigned int i; @@ -9519,11 +9566,11 @@ static void nft_trans_gc_trans_free(struct rcu_head *rcu) ctx.net = read_pnet(&trans->set->net); for (i = 0; i < trans->count; i++) { - elem.priv = trans->priv[i]; - if (!nft_setelem_is_catchall(trans->set, &elem)) + elem_priv = trans->priv[i]; + if (!nft_setelem_is_catchall(trans->set, elem_priv)) atomic_dec(&trans->set->nelems); - nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv); + nf_tables_set_elem_destroy(&ctx, trans->set, elem_priv); } nft_trans_gc_destroy(trans); @@ -9700,8 +9747,9 @@ dead_elem: struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) { struct nft_set_elem_catchall *catchall, *next; + u64 tstamp = nft_net_tstamp(gc->net); const struct nft_set *set = gc->set; - struct nft_set_elem elem; + struct nft_elem_priv *elem_priv; struct nft_set_ext *ext; WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)); @@ -9709,19 +9757,17 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc) list_for_each_entry_safe(catchall, next, &set->catchall_list, list) { ext = nft_set_elem_ext(set, catchall->elem); - if (!nft_set_elem_expired(ext)) + if (!__nft_set_elem_expired(ext, tstamp)) continue; gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) return NULL; - memset(&elem, 0, sizeof(elem)); - elem.priv = catchall->elem; - - nft_setelem_data_deactivate(gc->net, gc->set, &elem); + elem_priv = catchall->elem; + nft_setelem_data_deactivate(gc->net, gc->set, elem_priv); nft_setelem_catchall_destroy(catchall); - nft_trans_gc_elem_add(gc, elem.priv); + nft_trans_gc_elem_add(gc, elem_priv); } return gc; @@ -10105,9 +10151,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_setelem_activate(net, te->set, &te->elem); + nft_setelem_activate(net, te->set, te->elem_priv); nf_tables_setelem_notify(&trans->ctx, te->set, - &te->elem, + te->elem_priv, NFT_MSG_NEWSETELEM); if (te->set->ops->commit && list_empty(&te->set->pending_update)) { @@ -10121,10 +10167,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) te = (struct nft_trans_elem *)trans->data; nf_tables_setelem_notify(&trans->ctx, te->set, - &te->elem, + te->elem_priv, trans->msg_type); - nft_setelem_remove(net, te->set, &te->elem); - if (!nft_setelem_is_catchall(te->set, &te->elem)) { + nft_setelem_remove(net, te->set, te->elem_priv); + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) { atomic_dec(&te->set->nelems); te->set->ndeact--; } @@ -10244,7 +10290,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), - nft_trans_elem(trans).priv, true); + nft_trans_elem_priv(trans), true); break; case NFT_MSG_NEWOBJ: nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); @@ -10392,8 +10438,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; } te = (struct nft_trans_elem *)trans->data; - nft_setelem_remove(net, te->set, &te->elem); - if (!nft_setelem_is_catchall(te->set, &te->elem)) + nft_setelem_remove(net, te->set, te->elem_priv); + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) atomic_dec(&te->set->nelems); if (te->set->ops->abort && @@ -10406,9 +10452,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) case NFT_MSG_DESTROYSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_setelem_data_activate(net, te->set, &te->elem); - nft_setelem_activate(net, te->set, &te->elem); - if (!nft_setelem_is_catchall(te->set, &te->elem)) + nft_setelem_data_activate(net, te->set, te->elem_priv); + nft_setelem_activate(net, te->set, te->elem_priv); + if (!nft_setelem_is_catchall(te->set, te->elem_priv)) te->set->ndeact--; if (te->set->ops->abort && @@ -10497,6 +10543,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid) bool genid_ok; mutex_lock(&nft_net->commit_mutex); + nft_net->tstamp = get_jiffies_64(); genid_ok = genid == 0 || nft_net->base_seq == genid; if (!genid_ok) @@ -10584,9 +10631,9 @@ static int nft_check_loops(const struct nft_ctx *ctx, static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_iter *iter, - struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) @@ -11393,4 +11440,5 @@ module_exit(nf_tables_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_DESCRIPTION("Framework for packet filtering and classification"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES); diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 711c22ab70..c3e6353647 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -115,7 +115,7 @@ static noinline void __nft_trace_verdict(const struct nft_pktinfo *pkt, { enum nft_trace_types type; - switch (regs->verdict.code) { + switch (regs->verdict.code & NF_VERDICT_MASK) { case NFT_CONTINUE: case NFT_RETURN: type = NFT_TRACETYPE_RETURN; @@ -308,10 +308,11 @@ next_rule: switch (regs.verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: - case NF_DROP: case NF_QUEUE: case NF_STOLEN: return regs.verdict.code; + case NF_DROP: + return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM); } switch (regs.verdict.code) { @@ -342,6 +343,9 @@ next_rule: if (static_branch_unlikely(&nft_counters_enabled)) nft_update_chain_stats(basechain, pkt); + if (nft_base_chain(basechain)->policy == NF_DROP) + return NF_DROP_REASON(pkt->skb, SKB_DROP_REASON_NETFILTER_DROP, EPERM); + return nft_base_chain(basechain)->policy; } EXPORT_SYMBOL_GPL(nft_do_chain); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 6d41c0bd3d..a83637e3f4 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -258,17 +258,21 @@ void nft_trace_notify(const struct nft_pktinfo *pkt, case __NFT_TRACETYPE_MAX: break; case NFT_TRACETYPE_RETURN: - case NFT_TRACETYPE_RULE: + case NFT_TRACETYPE_RULE: { + unsigned int v; + if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, verdict)) goto nla_put_failure; /* pkt->skb undefined iff NF_STOLEN, disable dump */ - if (verdict->code == NF_STOLEN) + v = verdict->code & NF_VERDICT_MASK; + if (v == NF_STOLEN) info->packet_dumped = true; else mark = pkt->skb->mark; break; + } case NFT_TRACETYPE_POLICY: mark = pkt->skb->mark; diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 50723ba082..c0fc431991 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -447,4 +447,5 @@ module_init(nfnl_osf_init); module_exit(nfnl_osf_fini); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Passive OS fingerprint matching"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 556bc902af..5cf38fc0a3 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -228,19 +228,29 @@ find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id) static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict) { const struct nf_ct_hook *ct_hook; - int err; if (verdict == NF_ACCEPT || verdict == NF_REPEAT || verdict == NF_STOP) { + unsigned int ct_verdict = verdict; + rcu_read_lock(); ct_hook = rcu_dereference(nf_ct_hook); - if (ct_hook) { - err = ct_hook->update(entry->state.net, entry->skb); - if (err < 0) - verdict = NF_DROP; - } + if (ct_hook) + ct_verdict = ct_hook->update(entry->state.net, entry->skb); rcu_read_unlock(); + + switch (ct_verdict & NF_VERDICT_MASK) { + case NF_ACCEPT: + /* follow userspace verdict, could be REPEAT */ + break; + case NF_STOLEN: + nf_queue_entry_free(entry); + return; + default: + verdict = ct_verdict & NF_VERDICT_MASK; + break; + } } nf_reinject(entry, verdict); } diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c index 98e4946100..40e230d8b7 100644 --- a/net/netfilter/nft_chain_nat.c +++ b/net/netfilter/nft_chain_nat.c @@ -137,6 +137,7 @@ module_init(nft_chain_nat_init); module_exit(nft_chain_nat_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("nftables network address translation support"); #ifdef CONFIG_NF_TABLES_IPV4 MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); #endif diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f0eeda97bf..1f9474fefe 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr, static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_TARGET_REV] = { .type = NLA_U32 }, + [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, }; @@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) { struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; + u32 l4proto; u32 flags; int err; @@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) return -EINVAL; flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); - if (flags & ~NFT_RULE_COMPAT_F_MASK) + if (flags & NFT_RULE_COMPAT_F_UNUSED || + flags & ~NFT_RULE_COMPAT_F_MASK) return -EINVAL; if (flags & NFT_RULE_COMPAT_F_INV) *inv = true; - *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); + if (l4proto > U16_MAX) + return -EINVAL; + + *proto = l4proto; + return 0; } @@ -419,7 +426,7 @@ static void nft_match_eval(const struct nft_expr *expr, static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, - [NFTA_MATCH_REV] = { .type = NLA_U32 }, + [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, }; @@ -724,7 +731,7 @@ out_put: static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, .len = NFT_COMPAT_NAME_MAX-1 }, - [NFTA_COMPAT_REV] = { .type = NLA_U32 }, + [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 86bb9d7797..bfd3e5a14d 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, break; #endif case NFT_CT_ID: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; + len = sizeof(u32); break; default: @@ -1250,7 +1253,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_EXPECT_L3PROTO]) priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO])); + switch (priv->l3num) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (priv->l3num != ctx->family) + return -EINVAL; + + fallthrough; + case NFPROTO_INET: + break; + default: + return -EOPNOTSUPP; + } + priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); + switch (priv->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + break; + default: + return -EOPNOTSUPP; + } + priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]); priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]); priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]); diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 629a91a8c6..c09dba5735 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -44,33 +44,34 @@ static int nft_dynset_expr_setup(const struct nft_dynset *priv, return 0; } -static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, - struct nft_regs *regs) +static struct nft_elem_priv *nft_dynset_new(struct nft_set *set, + const struct nft_expr *expr, + struct nft_regs *regs) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set_ext *ext; + void *elem_priv; u64 timeout; - void *elem; if (!atomic_add_unless(&set->nelems, 1, set->size)) return NULL; timeout = priv->timeout ? : set->timeout; - elem = nft_set_elem_init(set, &priv->tmpl, - ®s->data[priv->sreg_key], NULL, - ®s->data[priv->sreg_data], - timeout, 0, GFP_ATOMIC); - if (IS_ERR(elem)) + elem_priv = nft_set_elem_init(set, &priv->tmpl, + ®s->data[priv->sreg_key], NULL, + ®s->data[priv->sreg_data], + timeout, 0, GFP_ATOMIC); + if (IS_ERR(elem_priv)) goto err1; - ext = nft_set_elem_ext(set, elem); + ext = nft_set_elem_ext(set, elem_priv); if (priv->num_exprs && nft_dynset_expr_setup(priv, ext) < 0) goto err2; - return elem; + return elem_priv; err2: - nft_set_elem_destroy(set, elem, false); + nft_set_elem_destroy(set, elem_priv, false); err1: if (set->size) atomic_dec(&set->nelems); diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index ca905aa822..37cfe6dd71 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -208,4 +208,5 @@ bool nft_fib_reduce(struct nft_regs_track *track, EXPORT_SYMBOL_GPL(nft_fib_reduce); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Query routing table from nftables"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index a5268e6dd3..358e742afa 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -270,4 +270,5 @@ module_exit(nft_fwd_netdev_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("nftables netdev packet forwarding support"); MODULE_ALIAS_NFT_AF_EXPR(5, "fwd"); diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 1e5e7a181e..32df7a1683 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -13,6 +13,7 @@ #include <net/netfilter/nf_tables_core.h> struct nft_bitmap_elem { + struct nft_elem_priv priv; struct list_head head; struct nft_set_ext ext; }; @@ -104,8 +105,9 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, return NULL; } -static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_bitmap_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { const struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); @@ -116,23 +118,23 @@ static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, !nft_set_elem_active(&be->ext, genmask)) continue; - return be; + return &be->priv; } return ERR_PTR(-ENOENT); } static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { + struct nft_bitmap_elem *new = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *new = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; be = nft_bitmap_elem_find(set, new, genmask); if (be) { - *ext = &be->ext; + *elem_priv = &be->priv; return -EEXIST; } @@ -144,12 +146,11 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, return 0; } -static void nft_bitmap_remove(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_bitmap_remove(const struct net *net, const struct nft_set *set, + struct nft_elem_priv *elem_priv) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -161,10 +162,10 @@ static void nft_bitmap_remove(const struct net *net, static void nft_bitmap_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *be = elem->priv; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -174,28 +175,27 @@ static void nft_bitmap_activate(const struct net *net, nft_set_elem_change_active(net, set, &be->ext); } -static bool nft_bitmap_flush(const struct net *net, - const struct nft_set *set, void *_be) +static void nft_bitmap_flush(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { + struct nft_bitmap_elem *be = nft_elem_priv_cast(elem_priv); struct nft_bitmap *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); - struct nft_bitmap_elem *be = _be; u32 idx, off; nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); /* Enter 10 state, similar to deactivation. */ priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); - - return true; } -static void *nft_bitmap_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_bitmap_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_bitmap_elem *this = nft_elem_priv_cast(elem->priv), *be; struct nft_bitmap *priv = nft_set_priv(set); - struct nft_bitmap_elem *this = elem->priv, *be; u8 genmask = nft_genmask_next(net); u32 idx, off; @@ -209,7 +209,7 @@ static void *nft_bitmap_deactivate(const struct net *net, priv->bitmap[idx] &= ~(genmask << off); nft_set_elem_change_active(net, set, &be->ext); - return be; + return &be->priv; } static void nft_bitmap_walk(const struct nft_ctx *ctx, @@ -218,7 +218,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, { const struct nft_bitmap *priv = nft_set_priv(set); struct nft_bitmap_elem *be; - struct nft_set_elem elem; list_for_each_entry_rcu(be, &priv->list, head) { if (iter->count < iter->skip) @@ -226,9 +225,7 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx, if (!nft_set_elem_active(&be->ext, iter->genmask)) goto cont; - elem.priv = be; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &be->priv); if (iter->err < 0) return; @@ -265,6 +262,8 @@ static int nft_bitmap_init(const struct nft_set *set, { struct nft_bitmap *priv = nft_set_priv(set); + BUILD_BUG_ON(offsetof(struct nft_bitmap_elem, priv) != 0); + INIT_LIST_HEAD(&priv->list); priv->bitmap_size = nft_bitmap_size(set->klen); @@ -278,7 +277,7 @@ static void nft_bitmap_destroy(const struct nft_ctx *ctx, struct nft_bitmap_elem *be, *n; list_for_each_entry_safe(be, n, &priv->list, head) - nf_tables_set_elem_destroy(ctx, set, be); + nf_tables_set_elem_destroy(ctx, set, &be->priv); } static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 2013de934c..6968a3b342 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -27,6 +27,7 @@ struct nft_rhash { }; struct nft_rhash_elem { + struct nft_elem_priv priv; struct rhash_head node; struct nft_set_ext ext; }; @@ -35,6 +36,7 @@ struct nft_rhash_cmp_arg { const struct nft_set *set; const u32 *key; u8 genmask; + u64 tstamp; }; static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) @@ -61,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, return 1; if (nft_set_elem_is_dead(&he->ext)) return 1; - if (nft_set_elem_expired(&he->ext)) + if (__nft_set_elem_expired(&he->ext, x->tstamp)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) return 1; @@ -86,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_cur(net), .set = set, .key = key, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); @@ -95,8 +98,9 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, return !!he; } -static void *nft_rhash_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_rhash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; @@ -104,17 +108,19 @@ static void *nft_rhash_get(const struct net *net, const struct nft_set *set, .genmask = nft_genmask_cur(net), .set = set, .key = elem->key.val.data, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he != NULL) - return he; + return &he->priv; return ERR_PTR(-ENOENT); } static bool nft_rhash_update(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, + struct nft_elem_priv * + (*new)(struct nft_set *, const struct nft_expr *, struct nft_regs *regs), const struct nft_expr *expr, @@ -123,20 +129,23 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he, *prev; + struct nft_elem_priv *elem_priv; struct nft_rhash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, .key = key, + .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he != NULL) goto out; - he = new(set, expr, regs); - if (he == NULL) + elem_priv = new(set, expr, regs); + if (!elem_priv) goto err1; + he = nft_elem_priv_cast(elem_priv); prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, nft_rhash_params); if (IS_ERR(prev)) @@ -144,7 +153,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { - nft_set_elem_destroy(set, he, true); + nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); he = prev; } @@ -154,7 +163,7 @@ out: return true; err2: - nft_set_elem_destroy(set, he, true); + nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); err1: return false; @@ -162,14 +171,15 @@ err1: static int nft_rhash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { + struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); struct nft_rhash *priv = nft_set_priv(set); - struct nft_rhash_elem *he = elem->priv; struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, + .tstamp = nft_net_tstamp(net), }; struct nft_rhash_elem *prev; @@ -178,33 +188,32 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set, if (IS_ERR(prev)) return PTR_ERR(prev); if (prev) { - *ext = &prev->ext; + *elem_priv = &prev->priv; return -EEXIST; } return 0; } static void nft_rhash_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_rhash_elem *he = elem->priv; + struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_rhash_flush(const struct net *net, - const struct nft_set *set, void *priv) +static void nft_rhash_flush(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_rhash_elem *he = priv; + struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); - - return true; } -static void *nft_rhash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_rhash_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; @@ -212,6 +221,7 @@ static void *nft_rhash_deactivate(const struct net *net, .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, + .tstamp = nft_net_tstamp(net), }; rcu_read_lock(); @@ -221,15 +231,15 @@ static void *nft_rhash_deactivate(const struct net *net, rcu_read_unlock(); - return he; + return &he->priv; } static void nft_rhash_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { + struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); struct nft_rhash *priv = nft_set_priv(set); - struct nft_rhash_elem *he = elem->priv; rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); } @@ -260,7 +270,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; struct rhashtable_iter hti; - struct nft_set_elem elem; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); @@ -280,9 +289,7 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; - elem.priv = he; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err < 0) break; @@ -406,6 +413,8 @@ static int nft_rhash_init(const struct nft_set *set, struct rhashtable_params params = nft_rhash_params; int err; + BUILD_BUG_ON(offsetof(struct nft_rhash_elem, priv) != 0); + params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT; params.key_len = set->klen; @@ -428,8 +437,9 @@ struct nft_rhash_ctx { static void nft_rhash_elem_destroy(void *ptr, void *arg) { struct nft_rhash_ctx *rhash_ctx = arg; + struct nft_rhash_elem *he = ptr; - nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr); + nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, &he->priv); } static void nft_rhash_destroy(const struct nft_ctx *ctx, @@ -476,6 +486,7 @@ struct nft_hash { }; struct nft_hash_elem { + struct nft_elem_priv priv; struct hlist_node node; struct nft_set_ext ext; }; @@ -501,8 +512,9 @@ bool nft_hash_lookup(const struct net *net, const struct nft_set *set, return false; } -static void *nft_hash_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_hash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); @@ -514,7 +526,7 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set, hlist_for_each_entry_rcu(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) && nft_set_elem_active(&he->ext, genmask)) - return he; + return &he->priv; } return ERR_PTR(-ENOENT); } @@ -562,9 +574,9 @@ static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv, static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { - struct nft_hash_elem *this = elem->priv, *he; + struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 hash; @@ -574,7 +586,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, if (!memcmp(nft_set_ext_key(&this->ext), nft_set_ext_key(&he->ext), set->klen) && nft_set_elem_active(&he->ext, genmask)) { - *ext = &he->ext; + *elem_priv = &he->priv; return -EEXIST; } } @@ -583,28 +595,28 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, } static void nft_hash_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_hash_elem *he = elem->priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); } -static bool nft_hash_flush(const struct net *net, - const struct nft_set *set, void *priv) +static void nft_hash_flush(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_hash_elem *he = priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); - return true; } -static void *nft_hash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_hash_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *this = elem->priv, *he; u8 genmask = nft_genmask_next(net); u32 hash; @@ -614,7 +626,7 @@ static void *nft_hash_deactivate(const struct net *net, set->klen) && nft_set_elem_active(&he->ext, genmask)) { nft_set_elem_change_active(net, set, &he->ext); - return he; + return &he->priv; } } return NULL; @@ -622,9 +634,9 @@ static void *nft_hash_deactivate(const struct net *net, static void nft_hash_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_hash_elem *he = elem->priv; + struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); hlist_del_rcu(&he->node); } @@ -634,7 +646,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; - struct nft_set_elem elem; int i; for (i = 0; i < priv->buckets; i++) { @@ -644,9 +655,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; - elem.priv = he; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err < 0) return; cont: @@ -685,7 +694,7 @@ static void nft_hash_destroy(const struct nft_ctx *ctx, for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); - nf_tables_set_elem_destroy(ctx, set, he); + nf_tables_set_elem_destroy(ctx, set, &he->priv); } } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 3ff31043f7..3089c4ca8f 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -342,9 +342,6 @@ #include "nft_set_pipapo_avx2.h" #include "nft_set_pipapo.h" -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); - /** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits @@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); + struct nft_pipapo_scratch *scratch; unsigned long *res_map, *fill_map; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; @@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, local_bh_disable(); - map_index = raw_cpu_read(nft_pipapo_scratch_index); - m = rcu_dereference(priv->match); if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) goto out; - res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0); - fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max); + scratch = *raw_cpu_ptr(m->scratch); + + map_index = scratch->map_index; + + res_map = scratch->map + (map_index ? m->bsize_max : 0); + fill_map = scratch->map + (map_index ? 0 : m->bsize_max); memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); @@ -460,7 +460,7 @@ next_match: b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt, last); if (b < 0) { - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return false; @@ -477,7 +477,7 @@ next_match: * current inactive bitmap is clean and can be reused as * *next* bitmap (not initial) for the next packet. */ - raw_cpu_write(nft_pipapo_scratch_index, map_index); + scratch->map_index = map_index; local_bh_enable(); return true; @@ -504,6 +504,7 @@ out: * @set: nftables API set representation * @data: Key data to be matched against existing elements * @genmask: If set, check that element is active in given genmask + * @tstamp: timestamp to check for expired elements * * This is essentially the same as the lookup function, except that it matches * key data against the uncommitted copy and doesn't use preallocated maps for @@ -513,7 +514,8 @@ out: */ static struct nft_pipapo_elem *pipapo_get(const struct net *net, const struct nft_set *set, - const u8 *data, u8 genmask) + const u8 *data, u8 genmask, + u64 tstamp) { struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT); struct nft_pipapo *priv = nft_set_priv(set); @@ -566,7 +568,7 @@ next_match: goto out; if (last) { - if (nft_set_elem_expired(&f->mt[b].e->ext)) + if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp)) goto next_match; if ((genmask && !nft_set_elem_active(&f->mt[b].e->ext, genmask))) @@ -599,11 +601,18 @@ out: * @elem: nftables API element representation containing key data * @flags: Unused */ -static void *nft_pipapo_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_pipapo_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { - return pipapo_get(net, set, (const u8 *)elem->key.val.data, - nft_genmask_cur(net)); + struct nft_pipapo_elem *e; + + e = pipapo_get(net, set, (const u8 *)elem->key.val.data, + nft_genmask_cur(net), get_jiffies_64()); + if (IS_ERR(e)) + return ERR_CAST(e); + + return &e->priv; } /** @@ -1102,6 +1111,25 @@ static void pipapo_map(struct nft_pipapo_match *m, } /** + * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address + * @m: Matching data + * @cpu: CPU number + */ +static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) +{ + struct nft_pipapo_scratch *s; + void *mem; + + s = *per_cpu_ptr(m->scratch, cpu); + if (!s) + return; + + mem = s; + mem -= s->align_off; + kfree(mem); +} + +/** * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results * @clone: Copy of matching data with pending insertions and deletions * @bsize_max: Maximum bucket size, scratch maps cover two buckets @@ -1114,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, int i; for_each_possible_cpu(i) { - unsigned long *scratch; + struct nft_pipapo_scratch *scratch; #ifdef NFT_PIPAPO_ALIGN - unsigned long *scratch_aligned; + void *scratch_aligned; + u32 align_off; #endif - - scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + + scratch = kzalloc_node(struct_size(scratch, map, + bsize_max * 2) + NFT_PIPAPO_ALIGN_HEADROOM, GFP_KERNEL, cpu_to_node(i)); if (!scratch) { @@ -1133,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, return -ENOMEM; } - kfree(*per_cpu_ptr(clone->scratch, i)); - - *per_cpu_ptr(clone->scratch, i) = scratch; + pipapo_free_scratch(clone, i); #ifdef NFT_PIPAPO_ALIGN - scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); - *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; + /* Align &scratch->map (not the struct itself): the extra + * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() + * above guarantee we can waste up to those bytes in order + * to align the map field regardless of its offset within + * the struct. + */ + BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); + + scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); + scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); + align_off = scratch_aligned - (void *)scratch; + + scratch = scratch_aligned; + scratch->align_off = align_off; #endif + *per_cpu_ptr(clone->scratch, i) = scratch; } return 0; @@ -1151,21 +1191,22 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, * @net: Network namespace * @set: nftables API set representation * @elem: nftables API element representation containing key data - * @ext2: Filled with pointer to &struct nft_set_ext in inserted element + * @elem_priv: Filled with pointer to &struct nft_set_ext in inserted element * * Return: 0 on success, error pointer on failure. */ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext2) + struct nft_elem_priv **elem_priv) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS]; const u8 *start = (const u8 *)elem->key.val.data, *end; - struct nft_pipapo_elem *e = elem->priv, *dup; struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); + struct nft_pipapo_elem *e, *dup; + u64 tstamp = nft_net_tstamp(net); struct nft_pipapo_field *f; const u8 *start_p, *end_p; int i, bsize_max, err = 0; @@ -1175,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, else end = start; - dup = pipapo_get(net, set, start, genmask); + dup = pipapo_get(net, set, start, genmask, tstamp); if (!IS_ERR(dup)) { /* Check if we already have the same exact entry */ const struct nft_data *dup_key, *dup_end; @@ -1188,7 +1229,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) && !memcmp(end, dup_end->data, sizeof(*dup_end->data))) { - *ext2 = &dup->ext; + *elem_priv = &dup->priv; return -EEXIST; } @@ -1197,13 +1238,13 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, if (PTR_ERR(dup) == -ENOENT) { /* Look for partially overlapping entries */ - dup = pipapo_get(net, set, end, nft_genmask_next(net)); + dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp); } if (PTR_ERR(dup) != -ENOENT) { if (IS_ERR(dup)) return PTR_ERR(dup); - *ext2 = &dup->ext; + *elem_priv = &dup->priv; return -ENOTEMPTY; } @@ -1263,7 +1304,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, put_cpu_ptr(m->scratch); } - *ext2 = &e->ext; + e = nft_elem_priv_cast(elem->priv); + *elem_priv = &e->priv; pipapo_map(m, rulemap, e); @@ -1293,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch) goto out_scratch; -#ifdef NFT_PIPAPO_ALIGN - new->scratch_aligned = alloc_percpu(*new->scratch_aligned); - if (!new->scratch_aligned) - goto out_scratch; -#endif for_each_possible_cpu(i) *per_cpu_ptr(new->scratch, i) = NULL; @@ -1349,10 +1386,7 @@ out_lt: } out_scratch_realloc: for_each_possible_cpu(i) - kfree(*per_cpu_ptr(new->scratch, i)); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(new->scratch_aligned); -#endif + pipapo_free_scratch(new, i); out_scratch: free_percpu(new->scratch); kfree(new); @@ -1540,23 +1574,19 @@ static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set, struct nft_pipapo_elem *e) { - struct nft_set_elem elem = { - .priv = e, - }; - - nft_setelem_data_deactivate(net, set, &elem); + nft_setelem_data_deactivate(net, set, &e->priv); } /** * pipapo_gc() - Drop expired entries from set, destroy start and end elements - * @_set: nftables API set representation + * @set: nftables API set representation * @m: Matching data */ -static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) +static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m) { - struct nft_set *set = (struct nft_set *) _set; struct nft_pipapo *priv = nft_set_priv(set); struct net *net = read_pnet(&set->net); + u64 tstamp = nft_net_tstamp(net); int rules_f0, first_rule = 0; struct nft_pipapo_elem *e; struct nft_trans_gc *gc; @@ -1591,7 +1621,7 @@ static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m) /* synchronous gc never fails, there is no need to set on * NFT_SET_ELEM_DEAD_BIT. */ - if (nft_set_elem_expired(&e->ext)) { + if (__nft_set_elem_expired(&e->ext, tstamp)) { priv->dirty = true; gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); @@ -1637,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m) int i; for_each_possible_cpu(i) - kfree(*per_cpu_ptr(m->scratch, i)); + pipapo_free_scratch(m, i); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); - pipapo_free_fields(m); kfree(m); @@ -1672,7 +1698,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu) * We also need to create a new working copy for subsequent insertions and * deletions. */ -static void nft_pipapo_commit(const struct nft_set *set) +static void nft_pipapo_commit(struct nft_set *set) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *new_clone, *old; @@ -1732,7 +1758,7 @@ static void nft_pipapo_abort(const struct nft_set *set) * nft_pipapo_activate() - Mark element reference as active given key, commit * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * On insertion, elements are added to a copy of the matching data currently * in use for lookups, and not directly inserted into current lookup data. Both @@ -1741,9 +1767,9 @@ static void nft_pipapo_abort(const struct nft_set *set) */ static void nft_pipapo_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_pipapo_elem *e = elem->priv; + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &e->ext); } @@ -1766,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, { struct nft_pipapo_elem *e; - e = pipapo_get(net, set, data, nft_genmask_next(net)); + e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net)); if (IS_ERR(e)) return NULL; @@ -1783,9 +1809,9 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set, * * Return: deactivated element if found, NULL otherwise. */ -static void *nft_pipapo_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_pipapo_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); @@ -1796,7 +1822,7 @@ static void *nft_pipapo_deactivate(const struct net *net, * nft_pipapo_flush() - Call pipapo_deactivate() to make element inactive * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * This is functionally the same as nft_pipapo_deactivate(), with a slightly * different interface, and it's also called once for each element in a set @@ -1810,13 +1836,12 @@ static void *nft_pipapo_deactivate(const struct net *net, * * Return: true if element was found and deactivated. */ -static bool nft_pipapo_flush(const struct net *net, const struct nft_set *set, - void *elem) +static void nft_pipapo_flush(const struct net *net, const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_pipapo_elem *e = elem; + struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); - return pipapo_deactivate(net, set, (const u8 *)nft_set_ext_key(&e->ext), - &e->ext); + nft_set_elem_change_active(net, set, &e->ext); } /** @@ -1939,7 +1964,7 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, * nft_pipapo_remove() - Remove element given key, commit * @net: Network namespace * @set: nftables API set representation - * @elem: nftables API element representation containing key data + * @elem_priv: nftables API element representation containing key data * * Similarly to nft_pipapo_activate(), this is used as commit operation by the * API, but it's called once per element in the pending transaction, so we can't @@ -1947,14 +1972,15 @@ static bool pipapo_match_field(struct nft_pipapo_field *f, * the matched element here, if any, and commit the updated matching data. */ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { struct nft_pipapo *priv = nft_set_priv(set); struct nft_pipapo_match *m = priv->clone; - struct nft_pipapo_elem *e = elem->priv; int rules_f0, first_rule = 0; + struct nft_pipapo_elem *e; const u8 *data; + e = nft_elem_priv_cast(elem_priv); data = (const u8 *)nft_set_ext_key(&e->ext); while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) { @@ -2031,7 +2057,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, for (r = 0; r < f->rules; r++) { struct nft_pipapo_elem *e; - struct nft_set_elem elem; if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e) continue; @@ -2044,9 +2069,7 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set, if (!nft_set_elem_active(&e->ext, iter->genmask)) goto cont; - elem.priv = e; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &e->priv); if (iter->err < 0) goto out; @@ -2118,6 +2141,8 @@ static int nft_pipapo_init(const struct nft_set *set, struct nft_pipapo_field *f; int err, i, field_count; + BUILD_BUG_ON(offsetof(struct nft_pipapo_elem, priv) != 0); + field_count = desc->field_count ? : 1; if (field_count > NFT_PIPAPO_MAX_FIELDS) @@ -2130,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set, m->field_count = field_count; m->bsize_max = 0; - m->scratch = alloc_percpu(unsigned long *); + m->scratch = alloc_percpu(struct nft_pipapo_scratch *); if (!m->scratch) { err = -ENOMEM; goto out_scratch; @@ -2138,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set, for_each_possible_cpu(i) *per_cpu_ptr(m->scratch, i) = NULL; -#ifdef NFT_PIPAPO_ALIGN - m->scratch_aligned = alloc_percpu(unsigned long *); - if (!m->scratch_aligned) { - err = -ENOMEM; - goto out_free; - } - for_each_possible_cpu(i) - *per_cpu_ptr(m->scratch_aligned, i) = NULL; -#endif - rcu_head_init(&m->rcu); nft_pipapo_for_each_field(f, i, m) { @@ -2178,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set, return 0; out_free: -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif free_percpu(m->scratch); out_scratch: kfree(m); @@ -2212,7 +2224,7 @@ static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx, e = f->mt[r].e; - nf_tables_set_elem_destroy(ctx, set, e); + nf_tables_set_elem_destroy(ctx, set, &e->priv); } } @@ -2234,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(m->scratch_aligned); -#endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(m->scratch, cpu)); + pipapo_free_scratch(m, cpu); free_percpu(m->scratch); pipapo_free_fields(m); kfree(m); @@ -2251,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, if (priv->dirty) nft_set_pipapo_match_destroy(ctx, set, m); -#ifdef NFT_PIPAPO_ALIGN - free_percpu(priv->clone->scratch_aligned); -#endif for_each_possible_cpu(cpu) - kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); + pipapo_free_scratch(priv->clone, cpu); free_percpu(priv->clone->scratch); pipapo_free_fields(priv->clone); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 2e164a3199..f59a0cd811 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -131,20 +131,28 @@ struct nft_pipapo_field { }; /** + * struct nft_pipapo_scratch - percpu data used for lookup and matching + * @map_index: Current working bitmap index, toggled between field matches + * @align_off: Offset to get the originally allocated address + * @map: store partial matching results during lookup + */ +struct nft_pipapo_scratch { + u8 map_index; + u32 align_off; + unsigned long map[]; +}; + +/** * struct nft_pipapo_match - Data used for lookup and matching * @field_count Amount of fields in set * @scratch: Preallocated per-CPU maps for partial matching results - * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes * @bsize_max: Maximum lookup table bucket size of all fields, in longs * @rcu Matching data is swapped on commits * @f: Fields, with lookup and mapping tables */ struct nft_pipapo_match { int field_count; -#ifdef NFT_PIPAPO_ALIGN - unsigned long * __percpu *scratch_aligned; -#endif - unsigned long * __percpu *scratch; + struct nft_pipapo_scratch * __percpu *scratch; size_t bsize_max; struct rcu_head rcu; struct nft_pipapo_field f[] __counted_by(field_count); @@ -170,10 +178,12 @@ struct nft_pipapo_elem; /** * struct nft_pipapo_elem - API-facing representation of single set element + * @priv: element placeholder * @ext: nftables API extensions */ struct nft_pipapo_elem { - struct nft_set_ext ext; + struct nft_elem_priv priv; + struct nft_set_ext ext; }; int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 52e0d026d3..a3a8ddca99 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -57,7 +57,7 @@ /* Jump to label if @reg is zero */ #define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \ - asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ + asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \ "je %l[" #label "]" : : : : label) /* Store 256 bits from YMM register into memory. Contrary to bucket load @@ -71,9 +71,6 @@ #define NFT_PIPAPO_AVX2_ZERO(reg) \ asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg) -/* Current working bitmap index, toggled between field matches */ -static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index); - /** * nft_pipapo_avx2_prepare() - Prepare before main algorithm body * @@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_pipapo *priv = nft_set_priv(set); - unsigned long *res, *fill, *scratch; + struct nft_pipapo_scratch *scratch; u8 genmask = nft_genmask_cur(net); const u8 *rp = (const u8 *)key; struct nft_pipapo_match *m; struct nft_pipapo_field *f; + unsigned long *res, *fill; bool map_index; int i, ret = 0; @@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, */ kernel_fpu_begin_mask(0); - scratch = *raw_cpu_ptr(m->scratch_aligned); + scratch = *raw_cpu_ptr(m->scratch); if (unlikely(!scratch)) { kernel_fpu_end(); return false; } - map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index); - res = scratch + (map_index ? m->bsize_max : 0); - fill = scratch + (map_index ? 0 : m->bsize_max); + map_index = scratch->map_index; + + res = scratch->map + (map_index ? m->bsize_max : 0); + fill = scratch->map + (map_index ? 0 : m->bsize_max); /* Starting map doesn't need to be set for this implementation */ @@ -1221,7 +1220,7 @@ next_match: out: if (i % 2) - raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index); + scratch->map_index = !map_index; kernel_fpu_end(); return ret >= 0; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index e34662f4a7..9944fe479e 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -19,10 +19,11 @@ struct nft_rbtree { struct rb_root root; rwlock_t lock; seqcount_rwlock_t count; - struct delayed_work gc_work; + unsigned long last_gc; }; struct nft_rbtree_elem { + struct nft_elem_priv priv; struct rb_node node; struct nft_set_ext ext; }; @@ -48,8 +49,7 @@ static int nft_rbtree_cmp(const struct nft_set *set, static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe) { - return nft_set_elem_expired(&rbe->ext) || - nft_set_elem_is_dead(&rbe->ext); + return nft_set_elem_expired(&rbe->ext); } static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, @@ -197,8 +197,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, return false; } -static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem, unsigned int flags) +static struct nft_elem_priv * +nft_rbtree_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) { struct nft_rbtree *priv = nft_set_priv(set); unsigned int seq = read_seqcount_begin(&priv->count); @@ -209,33 +210,31 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); if (ret || !read_seqcount_retry(&priv->count, seq)) - return rbe; + return &rbe->priv; read_lock_bh(&priv->lock); seq = read_seqcount_begin(&priv->count); ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); - if (!ret) - rbe = ERR_PTR(-ENOENT); read_unlock_bh(&priv->lock); - return rbe; + if (!ret) + return ERR_PTR(-ENOENT); + + return &rbe->priv; } -static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, - struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe) +static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) { - struct nft_set_elem elem = { - .priv = rbe, - }; - - nft_setelem_data_deactivate(net, set, &elem); + lockdep_assert_held_write(&priv->lock); + nft_setelem_data_deactivate(net, set, &rbe->priv); rb_erase(&rbe->node, &priv->root); } static const struct nft_rbtree_elem * nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, - struct nft_rbtree_elem *rbe, u8 genmask) + struct nft_rbtree_elem *rbe) { struct nft_set *set = (struct nft_set *)__set; struct rb_node *prev = rb_prev(&rbe->node); @@ -254,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, while (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); if (nft_rbtree_interval_end(rbe_prev) && - nft_set_elem_active(&rbe_prev->ext, genmask)) + nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY)) break; prev = rb_prev(prev); @@ -263,7 +262,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, rbe_prev = NULL; if (prev) { rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); - nft_rbtree_gc_remove(net, set, priv, rbe_prev); + nft_rbtree_gc_elem_remove(net, set, priv, rbe_prev); /* There is always room in this trans gc for this element, * memory allocation never actually happens, hence, the warning @@ -277,7 +276,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv, nft_trans_gc_elem_add(gc, rbe_prev); } - nft_rbtree_gc_remove(net, set, priv, rbe); + nft_rbtree_gc_elem_remove(net, set, priv, rbe); gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC); if (WARN_ON_ONCE(!gc)) return ERR_PTR(-ENOMEM); @@ -307,13 +306,14 @@ static bool nft_rbtree_update_first(const struct nft_set *set, static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; struct rb_node *node, *next, *parent, **p, *first = NULL; struct nft_rbtree *priv = nft_set_priv(set); u8 cur_genmask = nft_genmask_cur(net); u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); int d; /* Descend the tree to search for an existing element greater than the @@ -361,11 +361,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, /* perform garbage collection to avoid bogus overlap reports * but skip new elements in this transaction. */ - if (nft_set_elem_expired(&rbe->ext) && + if (__nft_set_elem_expired(&rbe->ext, tstamp) && nft_set_elem_active(&rbe->ext, cur_genmask)) { const struct nft_rbtree_elem *removed_end; - removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask); + removed_end = nft_rbtree_gc_elem(set, priv, rbe); if (IS_ERR(removed_end)) return PTR_ERR(removed_end); @@ -424,7 +424,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, */ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { - *ext = &rbe_ge->ext; + *elem_priv = &rbe_ge->priv; return -EEXIST; } @@ -433,7 +433,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, */ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { - *ext = &rbe_le->ext; + *elem_priv = &rbe_le->priv; return -EEXIST; } @@ -485,10 +485,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, - struct nft_set_ext **ext) + struct nft_elem_priv **elem_priv) { + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv); struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->priv; int err; do { @@ -499,7 +499,7 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); - err = __nft_rbtree_insert(net, set, rbe, ext); + err = __nft_rbtree_insert(net, set, rbe, elem_priv); write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); } while (err == -EAGAIN); @@ -507,13 +507,8 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, return err; } -static void nft_rbtree_remove(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) { - struct nft_rbtree *priv = nft_set_priv(set); - struct nft_rbtree_elem *rbe = elem->priv; - write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); rb_erase(&rbe->node, &priv->root); @@ -521,33 +516,43 @@ static void nft_rbtree_remove(const struct net *net, write_unlock_bh(&priv->lock); } +static void nft_rbtree_remove(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) +{ + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); + struct nft_rbtree *priv = nft_set_priv(set); + + nft_rbtree_erase(priv, rbe); +} + static void nft_rbtree_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + struct nft_elem_priv *elem_priv) { - struct nft_rbtree_elem *rbe = elem->priv; + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &rbe->ext); } -static bool nft_rbtree_flush(const struct net *net, - const struct nft_set *set, void *priv) +static void nft_rbtree_flush(const struct net *net, + const struct nft_set *set, + struct nft_elem_priv *elem_priv) { - struct nft_rbtree_elem *rbe = priv; + struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &rbe->ext); - - return true; } -static void *nft_rbtree_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) +static struct nft_elem_priv * +nft_rbtree_deactivate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) { + struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv); const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; - struct nft_rbtree_elem *rbe, *this = elem->priv; u8 genmask = nft_genmask_next(net); + u64 tstamp = nft_net_tstamp(net); int d; while (parent != NULL) { @@ -568,14 +573,14 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; - } else if (nft_set_elem_expired(&rbe->ext)) { + } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) { break; } else if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left; continue; } - nft_rbtree_flush(net, set, rbe); - return rbe; + nft_rbtree_flush(net, set, &rbe->priv); + return &rbe->priv; } } return NULL; @@ -587,7 +592,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, { struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe; - struct nft_set_elem elem; struct rb_node *node; read_lock_bh(&priv->lock); @@ -599,9 +603,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; - elem.priv = rbe; - - iter->err = iter->fn(ctx, set, iter, &elem); + iter->err = iter->fn(ctx, set, iter, &rbe->priv); if (iter->err < 0) { read_unlock_bh(&priv->lock); return; @@ -612,45 +614,35 @@ cont: read_unlock_bh(&priv->lock); } -static void nft_rbtree_gc(struct work_struct *work) +static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + nft_setelem_data_deactivate(net, set, &rbe->priv); + nft_rbtree_erase(priv, rbe); +} + +static void nft_rbtree_gc(struct nft_set *set) { + struct nft_rbtree *priv = nft_set_priv(set); struct nft_rbtree_elem *rbe, *rbe_end = NULL; - struct nftables_pernet *nft_net; - struct nft_rbtree *priv; + struct net *net = read_pnet(&set->net); + u64 tstamp = nft_net_tstamp(net); + struct rb_node *node, *next; struct nft_trans_gc *gc; - struct rb_node *node; - struct nft_set *set; - unsigned int gc_seq; - struct net *net; - priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); net = read_pnet(&set->net); - nft_net = nft_pernet(net); - gc_seq = READ_ONCE(nft_net->gc_seq); - if (nft_set_gc_is_pending(set)) - goto done; - - gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); + gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL); if (!gc) - goto done; - - read_lock_bh(&priv->lock); - for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { + return; - /* Ruleset has been updated, try later. */ - if (READ_ONCE(nft_net->gc_seq) != gc_seq) { - nft_trans_gc_destroy(gc); - gc = NULL; - goto try_later; - } + for (node = rb_first(&priv->root); node ; node = next) { + next = rb_next(node); rbe = rb_entry(node, struct nft_rbtree_elem, node); - if (nft_set_elem_is_dead(&rbe->ext)) - goto dead_elem; - /* elements are reversed in the rbtree for historical reasons, * from highest to lowest value, that is why end element is * always visited before the start element. @@ -659,40 +651,37 @@ static void nft_rbtree_gc(struct work_struct *work) rbe_end = rbe; continue; } - if (!nft_set_elem_expired(&rbe->ext)) + if (!__nft_set_elem_expired(&rbe->ext, tstamp)) continue; - nft_set_elem_dead(&rbe->ext); - - if (!rbe_end) - continue; - - nft_set_elem_dead(&rbe_end->ext); - - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) goto try_later; - nft_trans_gc_elem_add(gc, rbe_end); - rbe_end = NULL; -dead_elem: - gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); + /* end element needs to be removed first, it has + * no timeout extension. + */ + if (rbe_end) { + nft_rbtree_gc_remove(net, set, priv, rbe_end); + nft_trans_gc_elem_add(gc, rbe_end); + rbe_end = NULL; + } + + gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL); if (!gc) goto try_later; + nft_rbtree_gc_remove(net, set, priv, rbe); nft_trans_gc_elem_add(gc, rbe); } - gc = nft_trans_gc_catchall_async(gc, gc_seq); - try_later: - read_unlock_bh(&priv->lock); - if (gc) - nft_trans_gc_queue_async_done(gc); -done: - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); + if (gc) { + gc = nft_trans_gc_catchall_sync(gc); + nft_trans_gc_queue_sync_done(gc); + priv->last_gc = jiffies; + } } static u64 nft_rbtree_privsize(const struct nlattr * const nla[], @@ -707,15 +696,12 @@ static int nft_rbtree_init(const struct nft_set *set, { struct nft_rbtree *priv = nft_set_priv(set); + BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0); + rwlock_init(&priv->lock); seqcount_rwlock_init(&priv->count, &priv->lock); priv->root = RB_ROOT; - INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc); - if (set->flags & NFT_SET_TIMEOUT) - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); - return 0; } @@ -726,12 +712,10 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct rb_node *node; - cancel_delayed_work_sync(&priv->gc_work); - rcu_barrier(); while ((node = priv->root.rb_node) != NULL) { rb_erase(node, &priv->root); rbe = rb_entry(node, struct nft_rbtree_elem, node); - nf_tables_set_elem_destroy(ctx, set, rbe); + nf_tables_set_elem_destroy(ctx, set, &rbe->priv); } } @@ -753,6 +737,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features, return true; } +static void nft_rbtree_commit(struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set))) + nft_rbtree_gc(set); +} + +static void nft_rbtree_gc_init(const struct nft_set *set) +{ + struct nft_rbtree *priv = nft_set_priv(set); + + priv->last_gc = jiffies; +} + const struct nft_set_type nft_set_rbtree_type = { .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, .ops = { @@ -766,6 +765,8 @@ const struct nft_set_type nft_set_rbtree_type = { .deactivate = nft_rbtree_deactivate, .flush = nft_rbtree_flush, .activate = nft_rbtree_activate, + .commit = nft_rbtree_commit, + .gc_init = nft_rbtree_gc_init, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, .get = nft_rbtree_get, diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 9f21953c74..f735d79d8b 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = { static struct nft_object_type nft_tunnel_obj_type __read_mostly = { .type = NFT_OBJECT_TUNNEL, + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_obj_ops, .maxattr = NFTA_TUNNEL_KEY_MAX, .policy = nft_tunnel_key_policy, diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 9c4f231be2..1eeff94228 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -257,5 +257,6 @@ static void __exit netlink_diag_exit(void) module_init(netlink_diag_init); module_exit(netlink_diag_exit); +MODULE_DESCRIPTION("Netlink-based socket monitoring/diagnostic interface (sock_diag)"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d41c4a936a..9c7ffd10df 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -225,7 +225,8 @@ static void genl_op_from_split(struct genl_op_iter *iter) } if (i + cnt < family->n_split_ops && - family->split_ops[i + cnt].flags & GENL_CMD_CAP_DUMP) { + family->split_ops[i + cnt].flags & GENL_CMD_CAP_DUMP && + (!cnt || family->split_ops[i + cnt].cmd == iter->doit.cmd)) { iter->dumpit = family->split_ops[i + cnt]; genl_op_fill_in_reject_policy_split(family, &iter->dumpit); cnt++; diff --git a/net/netlink/policy.c b/net/netlink/policy.c index 87e3de0fde..1f8909c16f 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -21,7 +21,7 @@ struct netlink_policy_dump_state { struct { const struct nla_policy *policy; unsigned int maxtype; - } policies[]; + } policies[] __counted_by(n_alloc); }; static int add_policy(struct netlink_policy_dump_state **statep, @@ -29,7 +29,7 @@ static int add_policy(struct netlink_policy_dump_state **statep, unsigned int maxtype) { struct netlink_policy_dump_state *state = *statep; - unsigned int n_alloc, i; + unsigned int old_n_alloc, n_alloc, i; if (!policy || !maxtype) return 0; @@ -52,12 +52,13 @@ static int add_policy(struct netlink_policy_dump_state **statep, if (!state) return -ENOMEM; - memset(&state->policies[state->n_alloc], 0, - flex_array_size(state, policies, n_alloc - state->n_alloc)); - - state->policies[state->n_alloc].policy = policy; - state->policies[state->n_alloc].maxtype = maxtype; + old_n_alloc = state->n_alloc; state->n_alloc = n_alloc; + memset(&state->policies[old_n_alloc], 0, + flex_array_size(state, policies, n_alloc - old_n_alloc)); + + state->policies[old_n_alloc].policy = policy; + state->policies[old_n_alloc].maxtype = maxtype; *statep = state; return 0; @@ -229,6 +230,8 @@ int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt) case NLA_S16: case NLA_S32: case NLA_S64: + case NLA_SINT: + case NLA_UINT: /* maximum is common, u64 min/max with padding */ return common + 2 * (nla_attr_size(0) + nla_attr_size(sizeof(u64))); @@ -287,6 +290,7 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, case NLA_U16: case NLA_U32: case NLA_U64: + case NLA_UINT: case NLA_MSECS: { struct netlink_range_validation range; @@ -296,8 +300,10 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, type = NL_ATTR_TYPE_U16; else if (pt->type == NLA_U32) type = NL_ATTR_TYPE_U32; - else + else if (pt->type == NLA_U64) type = NL_ATTR_TYPE_U64; + else + type = NL_ATTR_TYPE_UINT; if (pt->validation_type == NLA_VALIDATE_MASK) { if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MASK, @@ -319,7 +325,8 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, case NLA_S8: case NLA_S16: case NLA_S32: - case NLA_S64: { + case NLA_S64: + case NLA_SINT: { struct netlink_range_validation_signed range; if (pt->type == NLA_S8) @@ -328,8 +335,10 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, type = NL_ATTR_TYPE_S16; else if (pt->type == NLA_S32) type = NL_ATTR_TYPE_S32; - else + else if (pt->type == NLA_S64) type = NL_ATTR_TYPE_S64; + else + type = NL_ATTR_TYPE_SINT; nla_get_range_signed(pt, &range); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 96e91ab715..0eed00184a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -487,7 +487,7 @@ static struct sock *nr_make_new(struct sock *osk) sock_init_data(NULL, sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 6c9592d051..12684d835c 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index fd66014d8a..6fcd7e2ca8 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -311,11 +311,18 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, return 0; } -static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key, - const struct nshhdr *nh) +static noinline_for_stack int push_nsh(struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *a) { + u8 buffer[NSH_HDR_MAX_LEN]; + struct nshhdr *nh = (struct nshhdr *)buffer; int err; + err = nsh_hdr_from_nlattr(a, nh, NSH_HDR_MAX_LEN); + if (err) + return err; + err = nsh_push(skb, nh); if (err) return err; @@ -873,7 +880,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; @@ -890,7 +897,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, prepare_frag(vport, skb, orig_network_offset, ovs_key_mac_proto(key)); memset(&ovs_rt, 0, sizeof(ovs_rt)); - dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1, + dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); ovs_rt.dst.dev = vport->dev; @@ -1439,17 +1446,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, err = pop_eth(skb, key); break; - case OVS_ACTION_ATTR_PUSH_NSH: { - u8 buffer[NSH_HDR_MAX_LEN]; - struct nshhdr *nh = (struct nshhdr *)buffer; - - err = nsh_hdr_from_nlattr(nla_data(a), nh, - NSH_HDR_MAX_LEN); - if (unlikely(err)) - break; - err = push_nsh(skb, key, nh); + case OVS_ACTION_ATTR_PUSH_NSH: + err = push_nsh(skb, key, nla_data(a)); break; - } case OVS_ACTION_ATTR_POP_NSH: err = pop_nsh(skb, key); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 88965e2068..ebc5728aab 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -48,6 +48,7 @@ struct ovs_len_tbl { #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 +#define OVS_COPY_ACTIONS_MAX_DEPTH 16 static bool actions_may_change_flow(const struct nlattr *actions) { @@ -2545,13 +2546,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log); + u32 mpls_label_count, bool log, + u32 depth); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -2602,7 +2605,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return err; err = __ovs_nla_copy_actions(net, actions, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2617,7 +2621,8 @@ static int validate_and_copy_dec_ttl(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; int start, action_start, err, rem; @@ -2660,7 +2665,8 @@ static int validate_and_copy_dec_ttl(struct net *net, return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, - vlan_tci, mpls_label_count, log); + vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -2674,7 +2680,8 @@ static int validate_and_copy_clone(struct net *net, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log, bool last) + u32 mpls_label_count, bool log, bool last, + u32 depth) { int start, err; u32 exec; @@ -2694,7 +2701,8 @@ static int validate_and_copy_clone(struct net *net, return err; err = __ovs_nla_copy_actions(net, attr, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3063,7 +3071,7 @@ static int validate_and_copy_check_pkt_len(struct net *net, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, - bool log, bool last) + bool log, bool last, u32 depth) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; @@ -3111,7 +3119,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3124,7 +3133,8 @@ static int validate_and_copy_check_pkt_len(struct net *net, return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, - eth_type, vlan_tci, mpls_label_count, log); + eth_type, vlan_tci, mpls_label_count, log, + depth + 1); if (err) return err; @@ -3152,12 +3162,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, - u32 mpls_label_count, bool log) + u32 mpls_label_count, bool log, + u32 depth) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; + if (depth > OVS_COPY_ACTIONS_MAX_DEPTH) + return -EOVERFLOW; + nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { @@ -3355,7 +3369,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3426,7 +3440,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, depth); if (err) return err; skip_copy = true; @@ -3440,7 +3454,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, eth_type, vlan_tci, mpls_label_count, - log, last); + log, last, + depth); if (err) return err; skip_copy = true; @@ -3450,7 +3465,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_DEC_TTL: err = validate_and_copy_dec_ttl(net, a, key, sfa, eth_type, vlan_tci, - mpls_label_count, log); + mpls_label_count, log, + depth); if (err) return err; skip_copy = true; @@ -3495,7 +3511,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, - key->eth.vlan.tci, mpls_label_count, log); + key->eth.vlan.tci, mpls_label_count, log, + 0); if (err) ovs_nla_free_flow_actions(*sfa); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 4f3b1798e0..d108ae0bd0 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -220,16 +220,13 @@ static struct mask_array *tbl_mask_array_alloc(int size) struct mask_array *new; size = max(MASK_ARRAY_SIZE_MIN, size); - new = kzalloc(sizeof(struct mask_array) + - sizeof(struct sw_flow_mask *) * size + + new = kzalloc(struct_size(new, masks, size) + sizeof(u64) * size, GFP_KERNEL); if (!new) return NULL; new->masks_usage_zero_cntr = (u64 *)((u8 *)new + - sizeof(struct mask_array) + - sizeof(struct sw_flow_mask *) * - size); + struct_size(new, masks, size)); new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) + sizeof(u64) * size, diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 9e659db78c..f524dc3e48 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -48,7 +48,7 @@ struct mask_array { int count, max; struct mask_array_stats __percpu *masks_usage_stats; u64 *masks_usage_zero_cntr; - struct sw_flow_mask __rcu *masks[]; + struct sw_flow_mask __rcu *masks[] __counted_by(max); }; struct table_instance { diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h index 0c33889a85..ed11cd12b5 100644 --- a/net/openvswitch/meter.h +++ b/net/openvswitch/meter.h @@ -39,13 +39,13 @@ struct dp_meter { u32 max_delta_t; u64 used; struct ovs_flow_stats stats; - struct dp_meter_band bands[]; + struct dp_meter_band bands[] __counted_by(n_bands); }; struct dp_meter_instance { struct rcu_head rcu; u32 n_meters; - struct dp_meter __rcu *dp_meters[]; + struct dp_meter __rcu *dp_meters[] __counted_by(n_meters); }; struct dp_meter_table { diff --git a/net/packet/diag.c b/net/packet/diag.c index f6b200cb3c..9a7980e330 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -262,4 +262,5 @@ static void __exit packet_diag_exit(void) module_init(packet_diag_init); module_exit(packet_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("PACKET socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 17 /* AF_PACKET */); diff --git a/net/packet/internal.h b/net/packet/internal.h index 11ba8a7867..d5d7071200 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -94,7 +94,7 @@ struct packet_fanout { spinlock_t lock; refcount_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; - struct sock __rcu *arr[]; + struct sock __rcu *arr[] __counted_by(max_num_members); }; struct packet_rollover { diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 3aa50dc753..976fe250b5 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg) switch (cmd) { case SIOCINQ: - lock_sock(sk); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; - release_sock(sk); + spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: diff --git a/net/phonet/pep.c b/net/phonet/pep.c index faba31f2ef..3dd5f52bc1 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len) return 0; } +static unsigned int pep_first_packet_length(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff_head *q; + struct sk_buff *skb; + unsigned int len = 0; + bool found = false; + + if (sock_flag(sk, SOCK_URGINLINE)) { + q = &pn->ctrlreq_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) { + len = skb->len; + found = true; + } + spin_unlock_bh(&q->lock); + } + + if (likely(!found)) { + q = &sk->sk_receive_queue; + spin_lock_bh(&q->lock); + skb = skb_peek(q); + if (skb) + len = skb->len; + spin_unlock_bh(&q->lock); + } + + return len; +} + static int pep_ioctl(struct sock *sk, int cmd, int *karg) { struct pep_sock *pn = pep_sk(sk); @@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg) break; } - lock_sock(sk); - if (sock_flag(sk, SOCK_URGINLINE) && - !skb_queue_empty(&pn->ctrlreq_queue)) - *karg = skb_peek(&pn->ctrlreq_queue)->len; - else if (!skb_queue_empty(&sk->sk_receive_queue)) - *karg = skb_peek(&sk->sk_receive_queue)->len; - else - *karg = 0; - release_sock(sk); + *karg = pep_first_packet_length(sk); ret = 0; break; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 42e8b9e375..ef81d019b2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -609,7 +609,7 @@ static struct sock *rose_make_new(struct sock *osk) #endif sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fa8aec78f6..465bfe5eb0 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -259,15 +259,61 @@ static int rxrpc_listen(struct socket *sock, int backlog) } /** + * rxrpc_kernel_lookup_peer - Obtain remote transport endpoint for an address + * @sock: The socket through which it will be accessed + * @srx: The network address + * @gfp: Allocation flags + * + * Lookup or create a remote transport endpoint record for the specified + * address and return it with a ref held. + */ +struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock, + struct sockaddr_rxrpc *srx, gfp_t gfp) +{ + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + int ret; + + ret = rxrpc_validate_address(rx, srx, sizeof(*srx)); + if (ret < 0) + return ERR_PTR(ret); + + return rxrpc_lookup_peer(rx->local, srx, gfp); +} +EXPORT_SYMBOL(rxrpc_kernel_lookup_peer); + +/** + * rxrpc_kernel_get_peer - Get a reference on a peer + * @peer: The peer to get a reference on. + * + * Get a record for the remote peer in a call. + */ +struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer) +{ + return peer ? rxrpc_get_peer(peer, rxrpc_peer_get_application) : NULL; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); + +/** + * rxrpc_kernel_put_peer - Allow a kernel app to drop a peer reference + * @peer: The peer to drop a ref on + */ +void rxrpc_kernel_put_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_peer(peer, rxrpc_peer_put_application); +} +EXPORT_SYMBOL(rxrpc_kernel_put_peer); + +/** * rxrpc_kernel_begin_call - Allow a kernel service to begin a call * @sock: The socket on which to make the call - * @srx: The address of the peer to contact + * @peer: The peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use * @tx_total_len: Total length of data to transmit during the call (or -1) * @hard_timeout: The maximum lifespan of the call in sec * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue + * @service_id: The ID of the service to contact * @upgrade: Request service upgrade for call * @interruptibility: The call is interruptible, or can be canceled. * @debug_id: The debug ID for tracing to be assigned to the call @@ -280,13 +326,14 @@ static int rxrpc_listen(struct socket *sock, int backlog) * supplying @srx and @key. */ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, - struct sockaddr_rxrpc *srx, + struct rxrpc_peer *peer, struct key *key, unsigned long user_call_ID, s64 tx_total_len, u32 hard_timeout, gfp_t gfp, rxrpc_notify_rx_t notify_rx, + u16 service_id, bool upgrade, enum rxrpc_interruptibility interruptibility, unsigned int debug_id) @@ -295,13 +342,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct rxrpc_call_params p; struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - int ret; _enter(",,%x,%lx", key_serial(key), user_call_ID); - ret = rxrpc_validate_address(rx, srx, sizeof(*srx)); - if (ret < 0) - return ERR_PTR(ret); + if (WARN_ON_ONCE(peer->local != rx->local)) + return ERR_PTR(-EIO); lock_sock(&rx->sk); @@ -319,12 +364,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, memset(&cp, 0, sizeof(cp)); cp.local = rx->local; + cp.peer = peer; cp.key = key; cp.security_level = rx->min_sec_level; cp.exclusive = false; cp.upgrade = upgrade; - cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id); + cp.service_id = service_id; + call = rxrpc_new_client_call(rx, &cp, &p, gfp, debug_id); /* The socket has been unlocked. */ if (!IS_ERR(call)) { call->notify_rx = notify_rx; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index e8b4340813..027414dafe 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -198,11 +198,19 @@ struct rxrpc_host_header { */ struct rxrpc_skb_priv { struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ - u16 offset; /* Offset of data */ - u16 len; /* Length of data */ - u8 flags; + union { + struct { + u16 offset; /* Offset of data */ + u16 len; /* Length of data */ + u8 flags; #define RXRPC_RX_VERIFIED 0x01 - + }; + struct { + rxrpc_seq_t first_ack; /* First packet in acks table */ + u8 nr_acks; /* Number of acks+nacks */ + u8 nr_nacks; /* Number of nacks */ + }; + }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ }; @@ -364,6 +372,7 @@ struct rxrpc_conn_proto { struct rxrpc_conn_parameters { struct rxrpc_local *local; /* Representation of local endpoint */ + struct rxrpc_peer *peer; /* Representation of remote endpoint */ struct key *key; /* Security details */ bool exclusive; /* T if conn is exclusive */ bool upgrade; /* T if service ID can be upgraded */ @@ -506,7 +515,7 @@ struct rxrpc_connection { enum rxrpc_call_completion completion; /* Completion condition */ s32 abort_code; /* Abort code of connection abort */ int debug_id; /* debug ID for printks */ - atomic_t serial; /* packet serial number counter */ + rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 service_id; /* Service ID, possibly upgraded */ u32 security_level; /* Security level selected */ @@ -688,11 +697,11 @@ struct rxrpc_call { u8 cong_dup_acks; /* Count of ACKs showing missing packets */ u8 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ + struct sk_buff *cong_last_nack; /* Last ACK with nacks received */ /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ u16 ackr_sack_base; /* Starting slot in SACK table ring */ - rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_window; /* Base of SACK window */ rxrpc_seq_t ackr_wtop; /* Base of SACK window */ unsigned int ackr_nr_unacked; /* Number of unacked packets */ @@ -726,7 +735,8 @@ struct rxrpc_call { struct rxrpc_ack_summary { u16 nr_acks; /* Number of ACKs in packet */ u16 nr_new_acks; /* Number of new ACKs in packet */ - u16 nr_rot_new_acks; /* Number of rotated new ACKs */ + u16 nr_new_nacks; /* Number of new nacks in packet */ + u16 nr_retained_nacks; /* Number of nacks retained between ACKs */ u8 ack_reason; bool saw_nacks; /* Saw NACKs in packet */ bool new_low_nack; /* T if new low NACK found */ @@ -819,6 +829,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb) #include <trace/events/rxrpc.h> /* + * Allocate the next serial number on a connection. 0 must be skipped. + */ +static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn) +{ + rxrpc_serial_t serial; + + serial = conn->tx_serial; + if (serial == 0) + serial = 1; + conn->tx_serial = serial + 1; + return serial; +} + +/* * af_rxrpc.c */ extern atomic_t rxrpc_n_rx_skbs; @@ -867,7 +891,6 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, - struct sockaddr_rxrpc *, struct rxrpc_call_params *, gfp_t, unsigned int); void rxrpc_start_call_timer(struct rxrpc_call *call); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e363f21a20..0f78544d04 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -43,8 +43,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, unsigned long expiry = rxrpc_soft_ack_delay; unsigned long now = jiffies, ack_at; - call->ackr_serial = serial; - if (rxrpc_soft_ack_delay < expiry) expiry = rxrpc_soft_ack_delay; if (call->peer->srtt_us != 0) @@ -114,6 +112,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) { struct rxrpc_ackpacket *ack = NULL; + struct rxrpc_skb_priv *sp; struct rxrpc_txbuf *txb; unsigned long resend_at; rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted); @@ -141,14 +140,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) * explicitly NAK'd packets. */ if (ack_skb) { + sp = rxrpc_skb(ack_skb); ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); - for (i = 0; i < ack->nAcks; i++) { + for (i = 0; i < sp->nr_acks; i++) { rxrpc_seq_t seq; if (ack->acks[i] & 1) continue; - seq = ntohl(ack->firstPacket) + i; + seq = sp->first_ack + i; if (after(txb->seq, transmitted)) break; if (after(txb->seq, seq)) @@ -373,7 +373,6 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) { unsigned long now, next, t; - rxrpc_serial_t ackr_serial; bool resend = false, expired = false; s32 abort_code; @@ -423,8 +422,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET); - ackr_serial = xchg(&call->ackr_serial, 0); - rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial, + rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0, rxrpc_propose_ack_ping_for_lost_ack); } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f10b37c147..9fc9a6c3f6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -193,7 +193,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, * Allocate a new client call. */ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, - struct sockaddr_rxrpc *srx, struct rxrpc_conn_parameters *cp, struct rxrpc_call_params *p, gfp_t gfp, @@ -211,10 +210,12 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, now = ktime_get_real(); call->acks_latest_ts = now; call->cong_tstamp = now; - call->dest_srx = *srx; + call->dest_srx = cp->peer->srx; + call->dest_srx.srx_service = cp->service_id; call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; call->key = key_get(cp->key); + call->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_call); call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call); call->security_level = cp->security_level; if (p->kernel) @@ -306,10 +307,6 @@ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) _enter("{%d,%lx},", call->debug_id, call->user_call_ID); - call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp); - if (!call->peer) - goto error; - ret = rxrpc_look_up_bundle(call, gfp); if (ret < 0) goto error; @@ -334,7 +331,6 @@ error: */ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, struct rxrpc_call_params *p, gfp_t gfp, unsigned int debug_id) @@ -349,13 +345,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, p->user_call_ID); + if (WARN_ON_ONCE(!cp->peer)) { + release_sock(&rx->sk); + return ERR_PTR(-EIO); + } + limiter = rxrpc_get_call_slot(p, gfp); if (!limiter) { release_sock(&rx->sk); return ERR_PTR(-ERESTARTSYS); } - call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id); + call = rxrpc_alloc_client_call(rx, cp, p, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); up(limiter); @@ -685,6 +686,7 @@ static void rxrpc_destroy_call(struct work_struct *work) del_timer_sync(&call->timer); + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); rxrpc_cleanup_ring(call); while ((txb = list_first_entry_or_null(&call->tx_sendmsg, struct rxrpc_txbuf, call_link))) { diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 95f4bc206b..1f251d758c 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -95,6 +95,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, _enter("%d", conn->debug_id); + if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &pkt.ack, sizeof(pkt.ack)) < 0) + return; + if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE) + return; + } + chan = &conn->channels[channel]; /* If the last call got moved on whilst we were waiting to run, just @@ -117,7 +125,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, iov[2].iov_base = &ack_info; iov[2].iov_len = sizeof(ack_info); - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); pkt.whdr.epoch = htonl(conn->proto.epoch); pkt.whdr.cid = htonl(conn->proto.cid | channel); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 89ac05a711..39c908a3ca 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -25,7 +25,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, struct rxrpc_conn_proto k; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rb_node *p; - unsigned int seq = 0; + unsigned int seq = 1; k.epoch = sp->hdr.epoch; k.cid = sp->hdr.cid & RXRPC_CIDMASK; @@ -35,6 +35,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, * under just the RCU read lock, so we have to check for * changes. */ + seq++; /* 2 on the 1st/lockless path, otherwise odd */ read_seqbegin_or_lock(&peer->service_conn_lock, &seq); p = rcu_dereference_raw(peer->service_conns.rb_node); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 92495e73b8..9691de00ad 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } cumulative_acks += summary->nr_new_acks; - cumulative_acks += summary->nr_rot_new_acks; if (cumulative_acks > 255) cumulative_acks = 255; - summary->mode = call->cong_mode; summary->cwnd = call->cong_cwnd; summary->ssthresh = call->cong_ssthresh; summary->cumulative_acks = cumulative_acks; @@ -151,6 +149,7 @@ out_no_clear_ca: cwnd = RXRPC_TX_MAX_WINDOW; call->cong_cwnd = cwnd; call->cong_cumul_acks = cumulative_acks; + summary->mode = call->cong_mode; trace_rxrpc_congest(call, summary, acked_serial, change); if (resend) rxrpc_resend(call, skb); @@ -213,7 +212,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { if (before_eq(txb->seq, call->acks_hard_ack)) continue; - summary->nr_rot_new_acks++; if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; @@ -254,6 +252,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, { ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); + if (unlikely(call->cong_last_nack)) { + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); + call->cong_last_nack = NULL; + } + switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: @@ -703,6 +706,43 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, } /* + * Determine how many nacks from the previous ACK have now been satisfied. + */ +static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + rxrpc_seq_t seq) +{ + struct sk_buff *skb = call->cong_last_nack; + struct rxrpc_ackpacket ack; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int i, new_acks = 0, retained_nacks = 0; + rxrpc_seq_t old_seq = sp->first_ack; + u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack); + + if (after_eq(seq, old_seq + sp->nr_acks)) { + summary->nr_new_acks += sp->nr_nacks; + summary->nr_new_acks += seq - (old_seq + sp->nr_acks); + summary->nr_retained_nacks = 0; + } else if (seq == old_seq) { + summary->nr_retained_nacks = sp->nr_nacks; + } else { + for (i = 0; i < sp->nr_acks; i++) { + if (acks[i] == RXRPC_ACK_TYPE_NACK) { + if (before(old_seq + i, seq)) + new_acks++; + else + retained_nacks++; + } + } + + summary->nr_new_acks += new_acks; + summary->nr_retained_nacks = retained_nacks; + } + + return old_seq + sp->nr_acks; +} + +/* * Process individual soft ACKs. * * Each ACK in the array corresponds to one packet and can be either an ACK or @@ -711,25 +751,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, * the timer on the basis that the peer might just not have processed them at * the time the ACK was sent. */ -static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, - rxrpc_seq_t seq, int nr_acks, - struct rxrpc_ack_summary *summary) +static void rxrpc_input_soft_acks(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct sk_buff *skb, + rxrpc_seq_t seq, + rxrpc_seq_t since) { - unsigned int i; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int i, old_nacks = 0; + rxrpc_seq_t lowest_nak = seq + sp->nr_acks; + u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - for (i = 0; i < nr_acks; i++) { + for (i = 0; i < sp->nr_acks; i++) { if (acks[i] == RXRPC_ACK_TYPE_ACK) { summary->nr_acks++; - summary->nr_new_acks++; + if (after_eq(seq, since)) + summary->nr_new_acks++; } else { - if (!summary->saw_nacks && - call->acks_lowest_nak != seq + i) { - call->acks_lowest_nak = seq + i; - summary->new_low_nack = true; - } summary->saw_nacks = true; + if (before(seq, since)) { + /* Overlap with previous ACK */ + old_nacks++; + } else { + summary->nr_new_nacks++; + sp->nr_nacks++; + } + + if (before(seq, lowest_nak)) + lowest_nak = seq; } + seq++; + } + + if (lowest_nak != call->acks_lowest_nak) { + call->acks_lowest_nak = lowest_nak; + summary->new_low_nack = true; } + + /* We *can* have more nacks than we did - the peer is permitted to drop + * packets it has soft-acked and re-request them. Further, it is + * possible for the nack distribution to change whilst the number of + * nacks stays the same or goes down. + */ + if (old_nacks < summary->nr_retained_nacks) + summary->nr_new_acks += summary->nr_retained_nacks - old_nacks; + summary->nr_retained_nacks = old_nacks; } /* @@ -773,7 +839,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_ackinfo info; rxrpc_serial_t ack_serial, acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; int nr_acks, offset, ioffset; _enter(""); @@ -789,6 +855,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) prev_pkt = ntohl(ack.previousPacket); hard_ack = first_soft_ack - 1; nr_acks = ack.nAcks; + sp->first_ack = first_soft_ack; + sp->nr_acks = nr_acks; summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ? ack.reason : RXRPC_ACK__INVALID); @@ -858,6 +926,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) skb_condense(skb); + if (call->cong_last_nack) { + since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack); + rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); + call->cong_last_nack = NULL; + } else { + summary.nr_new_acks = first_soft_ack - call->acks_first_seq; + call->acks_lowest_nak = first_soft_ack + nr_acks; + since = first_soft_ack; + } + call->acks_latest_ts = skb->tstamp; call->acks_first_seq = first_soft_ack; call->acks_prev_seq = prev_pkt; @@ -866,7 +944,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_ACK_PING: break; default: - if (after(acked_serial, call->acks_highest_serial)) + if (acked_serial && after(acked_serial, call->acks_highest_serial)) call->acks_highest_serial = acked_serial; break; } @@ -905,8 +983,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); - rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, - nr_acks, &summary); + rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since); + rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); + call->cong_last_nack = skb; } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a0906145e8..4a292f860a 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -216,7 +216,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n; len = iov[0].iov_len; - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); txb->wire.serial = htonl(serial); trace_rxrpc_tx_ack(call->debug_id, serial, ntohl(txb->ack.firstPacket), @@ -302,7 +302,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) iov[0].iov_base = &pkt; iov[0].iov_len = sizeof(pkt); - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); pkt.whdr.serial = htonl(serial); iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); @@ -334,7 +334,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) _enter("%x,{%d}", txb->seq, txb->len); /* Each transmission of a Tx packet needs a new serial number */ - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); txb->wire.serial = htonl(serial); if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && @@ -558,7 +558,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn) len = iov[0].iov_len + iov[1].iov_len; - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); whdr.serial = htonl(serial); iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 8d7a715a0b..49dcda67a0 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -22,6 +22,8 @@ #include <net/ip6_route.h> #include "ar-internal.h" +static const struct sockaddr_rxrpc rxrpc_null_addr; + /* * Hash a peer key. */ @@ -457,39 +459,53 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet) } /** - * rxrpc_kernel_get_peer - Get the peer address of a call + * rxrpc_kernel_get_call_peer - Get the peer address of a call * @sock: The socket on which the call is in progress. * @call: The call to query - * @_srx: Where to place the result * - * Get the address of the remote peer in a call. + * Get a record for the remote peer in a call. */ -void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, - struct sockaddr_rxrpc *_srx) +struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call) { - *_srx = call->peer->srx; + return call->peer; } -EXPORT_SYMBOL(rxrpc_kernel_get_peer); +EXPORT_SYMBOL(rxrpc_kernel_get_call_peer); /** * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT - * @sock: The socket on which the call is in progress. - * @call: The call to query - * @_srtt: Where to store the SRTT value. + * @peer: The peer to query * - * Get the call's peer smoothed RTT in uS. + * Get the call's peer smoothed RTT in uS or UINT_MAX if we have no samples. */ -bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call, - u32 *_srtt) +unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer) { - struct rxrpc_peer *peer = call->peer; + return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX; +} +EXPORT_SYMBOL(rxrpc_kernel_get_srtt); - if (peer->rtt_count == 0) { - *_srtt = 1000000; /* 1S */ - return false; - } +/** + * rxrpc_kernel_remote_srx - Get the address of a peer + * @peer: The peer to query + * + * Get a pointer to the address from a peer record. The caller is responsible + * for making sure that the address is not deallocated. + */ +const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer) +{ + return peer ? &peer->srx : &rxrpc_null_addr; +} +EXPORT_SYMBOL(rxrpc_kernel_remote_srx); - *_srtt = call->peer->srtt_us >> 3; - return true; +/** + * rxrpc_kernel_remote_addr - Get the peer transport address of a call + * @peer: The peer to query + * + * Get a pointer to the transport address from a peer record. The caller is + * responsible for making sure that the address is not deallocated. + */ +const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer) +{ + return (const struct sockaddr *) + (peer ? &peer->srx.transport : &rxrpc_null_addr.transport); } -EXPORT_SYMBOL(rxrpc_kernel_get_srtt); +EXPORT_SYMBOL(rxrpc_kernel_remote_addr); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 682636d3b0..208312c244 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -181,7 +181,7 @@ print: atomic_read(&conn->active), state, key_serial(conn->key), - atomic_read(&conn->serial), + conn->tx_serial, conn->hi_serial, conn->channels[0].call_id, conn->channels[1].call_id, diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index b52dedcebc..6b32d61d4c 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -664,7 +664,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) len = iov[0].iov_len + iov[1].iov_len; - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); whdr.serial = htonl(serial); ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); @@ -721,7 +721,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn, len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len; - serial = atomic_inc_return(&conn->serial); + serial = rxrpc_get_next_serial(conn); whdr.serial = htonl(serial); rxrpc_local_dont_fragment(conn->local, false); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 8e0b94714e..5677d5690a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -572,6 +572,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, __acquires(&call->user_mutex) { struct rxrpc_conn_parameters cp; + struct rxrpc_peer *peer; struct rxrpc_call *call; struct key *key; @@ -584,21 +585,29 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, return ERR_PTR(-EDESTADDRREQ); } + peer = rxrpc_lookup_peer(rx->local, srx, GFP_KERNEL); + if (!peer) { + release_sock(&rx->sk); + return ERR_PTR(-ENOMEM); + } + key = rx->key; if (key && !rx->key->payload.data[0]) key = NULL; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; + cp.peer = peer; cp.key = rx->key; cp.security_level = rx->min_sec_level; cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL, + call = rxrpc_new_client_call(rx, &cp, &p->call, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ + rxrpc_put_peer(peer, rxrpc_peer_put_application); _leave(" = %p\n", call); return call; } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9d3f26bf04..c39252d61e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1098,7 +1098,7 @@ repeat: } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { - net_warn_ratelimited("can't go to NULL chain!\n"); + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index a7b3f60dd0..3d50215985 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -734,7 +734,6 @@ static struct tc_action_ops act_ct_ops; struct tc_ct_action_net { struct tc_action_net tn; /* Must be first */ - bool labels; }; /* Determine whether skb->_nfct is equal to the result of conntrack lookup. */ @@ -872,8 +871,13 @@ static void tcf_ct_params_free(struct tcf_ct_params *params) } if (params->ct_ft) tcf_ct_flow_table_put(params->ct_ft); - if (params->tmpl) + if (params->tmpl) { + if (params->put_labels) + nf_connlabels_put(nf_ct_net(params->tmpl)); + nf_ct_put(params->tmpl); + } + kfree(params); } @@ -1198,9 +1202,9 @@ static int tcf_ct_fill_params(struct net *net, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); struct nf_conntrack_zone zone; int err, family, proto, len; + bool put_labels = false; struct nf_conn *tmpl; char *name; @@ -1230,15 +1234,20 @@ static int tcf_ct_fill_params(struct net *net, } if (tb[TCA_CT_LABELS]) { + unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; + if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)) { NL_SET_ERR_MSG_MOD(extack, "Conntrack labels isn't enabled."); return -EOPNOTSUPP; } - if (!tn->labels) { + if (nf_connlabels_get(net, n_bits - 1)) { NL_SET_ERR_MSG_MOD(extack, "Failed to set connlabel length"); return -EOPNOTSUPP; + } else { + put_labels = true; } + tcf_ct_set_key_val(tb, p->labels, TCA_CT_LABELS, p->labels_mask, TCA_CT_LABELS_MASK, @@ -1282,10 +1291,15 @@ static int tcf_ct_fill_params(struct net *net, } } + p->put_labels = put_labels; + if (p->ct_action & TCA_CT_ACT_COMMIT) __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); return 0; err: + if (put_labels) + nf_connlabels_put(net); + nf_ct_put(p->tmpl); p->tmpl = NULL; return err; @@ -1589,32 +1603,13 @@ static struct tc_action_ops act_ct_ops = { static __net_init int ct_init_net(struct net *net) { - unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); - if (nf_connlabels_get(net, n_bits - 1)) { - tn->labels = false; - pr_err("act_ct: Failed to set connlabels length"); - } else { - tn->labels = true; - } - return tc_action_net_init(net, &tn->tn, &act_ct_ops); } static void __net_exit ct_exit_net(struct list_head *net_list) { - struct net *net; - - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { - struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); - - if (tn->labels) - nf_connlabels_put(net); - } - rtnl_unlock(); - tc_action_net_exit(net_list, act_ct_ops.net_id); } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index c9a811f4c7..393b787292 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -677,4 +677,5 @@ static void __exit gate_cleanup_module(void) module_init(gate_init_module); module_exit(gate_cleanup_module); +MODULE_DESCRIPTION("TC gate action"); MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0a711c184c..674f7ae356 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -206,18 +206,14 @@ release_idr: return err; } -static bool is_mirred_nested(void) -{ - return unlikely(__this_cpu_read(mirred_nest_level) > 1); -} - -static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) +static int +tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb) { int err; if (!want_ingress) err = tcf_dev_queue_xmit(skb, dev_queue_xmit); - else if (is_mirred_nested()) + else if (!at_ingress) err = netif_rx(skb); else err = netif_receive_skb(skb); @@ -225,110 +221,123 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) return err; } -TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, - const struct tc_action *a, - struct tcf_result *res) +static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m, + struct net_device *dev, + const bool m_mac_header_xmit, int m_eaction, + int retval) { - struct tcf_mirred *m = to_mirred(a); - struct sk_buff *skb2 = skb; - bool m_mac_header_xmit; - struct net_device *dev; - unsigned int nest_level; - int retval, err = 0; - bool use_reinsert; + struct sk_buff *skb_to_send = skb; bool want_ingress; bool is_redirect; bool expects_nh; bool at_ingress; - int m_eaction; + bool dont_clone; int mac_len; bool at_nh; + int err; - nest_level = __this_cpu_inc_return(mirred_nest_level); - if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { - net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", - netdev_name(skb->dev)); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_SHOT; - } - - tcf_lastuse_update(&m->tcf_tm); - tcf_action_update_bstats(&m->common, skb); - - m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); - m_eaction = READ_ONCE(m->tcfm_eaction); - retval = READ_ONCE(m->tcf_action); - dev = rcu_dereference_bh(m->tcfm_dev); - if (unlikely(!dev)) { - pr_notice_once("tc mirred: target device is gone\n"); - goto out; - } - + is_redirect = tcf_mirred_is_act_redirect(m_eaction); if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) { net_notice_ratelimited("tc mirred to Houston: device %s is down\n", dev->name); - goto out; + goto err_cant_do; } /* we could easily avoid the clone only if called by ingress and clsact; * since we can't easily detect the clsact caller, skip clone only for * ingress - that covers the TC S/W datapath. */ - is_redirect = tcf_mirred_is_act_redirect(m_eaction); at_ingress = skb_at_tc_ingress(skb); - use_reinsert = at_ingress && is_redirect && - tcf_mirred_can_reinsert(retval); - if (!use_reinsert) { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (!skb2) - goto out; + dont_clone = skb_at_tc_ingress(skb) && is_redirect && + tcf_mirred_can_reinsert(retval); + if (!dont_clone) { + skb_to_send = skb_clone(skb, GFP_ATOMIC); + if (!skb_to_send) + goto err_cant_do; } want_ingress = tcf_mirred_act_wants_ingress(m_eaction); /* All mirred/redirected skbs should clear previous ct info */ - nf_reset_ct(skb2); + nf_reset_ct(skb_to_send); if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ - skb_dst_drop(skb2); + skb_dst_drop(skb_to_send); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); if (at_nh != expects_nh) { - mac_len = skb_at_tc_ingress(skb) ? skb->mac_len : + mac_len = at_ingress ? skb->mac_len : skb_network_offset(skb); if (expects_nh) { /* target device/action expect data at nh */ - skb_pull_rcsum(skb2, mac_len); + skb_pull_rcsum(skb_to_send, mac_len); } else { /* target device/action expect data at mac */ - skb_push_rcsum(skb2, mac_len); + skb_push_rcsum(skb_to_send, mac_len); } } - skb2->skb_iif = skb->dev->ifindex; - skb2->dev = dev; + skb_to_send->skb_iif = skb->dev->ifindex; + skb_to_send->dev = dev; - /* mirror is always swallowed */ if (is_redirect) { - skb_set_redirected(skb2, skb2->tc_at_ingress); - - /* let's the caller reinsert the packet, if possible */ - if (use_reinsert) { - err = tcf_mirred_forward(want_ingress, skb); - if (err) - tcf_action_inc_overlimit_qstats(&m->common); - __this_cpu_dec(mirred_nest_level); - return TC_ACT_CONSUMED; - } + if (skb == skb_to_send) + retval = TC_ACT_CONSUMED; + + skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress); + + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); + } else { + err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send); } + if (err) + tcf_action_inc_overlimit_qstats(&m->common); + + return retval; + +err_cant_do: + if (is_redirect) + retval = TC_ACT_SHOT; + tcf_action_inc_overlimit_qstats(&m->common); + return retval; +} + +TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_mirred *m = to_mirred(a); + int retval = READ_ONCE(m->tcf_action); + unsigned int nest_level; + bool m_mac_header_xmit; + struct net_device *dev; + int m_eaction; - err = tcf_mirred_forward(want_ingress, skb2); - if (err) { -out: + nest_level = __this_cpu_inc_return(mirred_nest_level); + if (unlikely(nest_level > MIRRED_NEST_LIMIT)) { + net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n", + netdev_name(skb->dev)); + retval = TC_ACT_SHOT; + goto dec_nest_level; + } + + tcf_lastuse_update(&m->tcf_tm); + tcf_action_update_bstats(&m->common, skb); + + dev = rcu_dereference_bh(m->tcfm_dev); + if (unlikely(!dev)) { + pr_notice_once("tc mirred: target device is gone\n"); tcf_action_inc_overlimit_qstats(&m->common); - if (tcf_mirred_is_act_redirect(m_eaction)) - retval = TC_ACT_SHOT; + goto dec_nest_level; } + + m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit); + m_eaction = READ_ONCE(m->tcfm_eaction); + + retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction, + retval); + +dec_nest_level: __this_cpu_dec(mirred_nest_level); return retval; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 84e18b5f72..02c594baa1 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1664,6 +1664,7 @@ static inline int __tcf_classify(struct sk_buff *skb, int act_index, u32 *last_executed_chain) { + u32 orig_reason = res->drop_reason; #ifdef CONFIG_NET_CLS_ACT const int max_reclassify_loop = 16; const struct tcf_proto *first_tp; @@ -1687,12 +1688,16 @@ reclassify: * time we got here with a cookie from hardware. */ if (unlikely(n->tp != tp || n->tp->chain != n->chain || - !tp->ops->get_exts)) + !tp->ops->get_exts)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } exts = tp->ops->get_exts(tp, n->handle); - if (unlikely(!exts || n->exts != exts)) + if (unlikely(!exts || n->exts != exts)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } n = NULL; err = tcf_exts_exec_ex(skb, exts, act_index, res); @@ -1714,12 +1719,20 @@ reclassify: goto reset; } #endif - if (err >= 0) + if (err >= 0) { + /* Policy drop or drop reason is over-written by + * classifiers with a bogus value(0) */ + if (err == TC_ACT_SHOT && + res->drop_reason == SKB_NOT_DROPPED_YET) + tcf_set_drop_reason(res, orig_reason); return err; + } } - if (unlikely(n)) + if (unlikely(n)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } return TC_ACT_UNSPEC; /* signal: continue lookup */ #ifdef CONFIG_NET_CLS_ACT @@ -1729,6 +1742,7 @@ reset: tp->chain->block->index, tp->prio & 0xffff, ntohs(tp->protocol)); + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; } @@ -1765,8 +1779,10 @@ int tcf_classify(struct sk_buff *skb, if (ext->act_miss) { n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie, &act_index); - if (!n) + if (!n) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } chain = n->chain_index; } else { @@ -1774,8 +1790,10 @@ int tcf_classify(struct sk_buff *skb, } fchain = tcf_chain_lookup_rcu(block, chain); - if (!fchain) + if (!fchain) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } /* Consume, so cloned/redirect skbs won't inherit ext */ skb_ext_del(skb, TC_SKB_EXT); @@ -1794,8 +1812,11 @@ int tcf_classify(struct sk_buff *skb, struct tc_skb_cb *cb = tc_skb_cb(skb); ext = tc_skb_ext_alloc(skb); - if (WARN_ON_ONCE(!ext)) + if (WARN_ON_ONCE(!ext)) { + tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR); return TC_ACT_SHOT; + } + ext->chain = last_executed_chain; ext->mru = cb->mru; ext->post_ct = cb->post_ct; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 1b92c33b5f..a1f5693133 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -341,4 +341,5 @@ static void __exit exit_basic(void) module_init(init_basic) module_exit(exit_basic) +MODULE_DESCRIPTION("TC basic classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index bd9322d719..7ee8dbf49e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -222,4 +222,5 @@ static void __exit exit_cgroup_cls(void) module_init(init_cgroup_cls); module_exit(exit_cgroup_cls); +MODULE_DESCRIPTION("TC cgroup classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index efb9d2811b..6ee7064c82 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2460,8 +2460,11 @@ unbind_filter: } errout_idr: - if (!fold) + if (!fold) { + spin_lock(&tp->lock); idr_remove(&head->handle_idr, fnew->handle); + spin_unlock(&tp->lock); + } __fl_put(fnew); errout_tb: kfree(tb); diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index c49d6af0e0..afc534ee0a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -446,4 +446,5 @@ static void __exit exit_fw(void) module_init(init_fw) module_exit(exit_fw) +MODULE_DESCRIPTION("SKB mark based TC classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 1e20bbd687..12a505db41 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -375,9 +375,9 @@ out: static const struct nla_policy route4_policy[TCA_ROUTE4_MAX + 1] = { [TCA_ROUTE4_CLASSID] = { .type = NLA_U32 }, - [TCA_ROUTE4_TO] = { .type = NLA_U32 }, - [TCA_ROUTE4_FROM] = { .type = NLA_U32 }, - [TCA_ROUTE4_IIF] = { .type = NLA_U32 }, + [TCA_ROUTE4_TO] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_FROM] = NLA_POLICY_MAX(NLA_U32, 0xFF), + [TCA_ROUTE4_IIF] = NLA_POLICY_MAX(NLA_U32, 0x7FFF), }; static int route4_set_parms(struct net *net, struct tcf_proto *tp, @@ -397,33 +397,37 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, return err; if (tb[TCA_ROUTE4_TO]) { - if (new && handle & 0x8000) + if (new && handle & 0x8000) { + NL_SET_ERR_MSG(extack, "Invalid handle"); return -EINVAL; + } to = nla_get_u32(tb[TCA_ROUTE4_TO]); - if (to > 0xFF) - return -EINVAL; nhandle = to; } + if (tb[TCA_ROUTE4_FROM] && tb[TCA_ROUTE4_IIF]) { + NL_SET_ERR_MSG_ATTR(extack, tb[TCA_ROUTE4_FROM], + "'from' and 'fromif' are mutually exclusive"); + return -EINVAL; + } + if (tb[TCA_ROUTE4_FROM]) { - if (tb[TCA_ROUTE4_IIF]) - return -EINVAL; id = nla_get_u32(tb[TCA_ROUTE4_FROM]); - if (id > 0xFF) - return -EINVAL; nhandle |= id << 16; } else if (tb[TCA_ROUTE4_IIF]) { id = nla_get_u32(tb[TCA_ROUTE4_IIF]); - if (id > 0x7FFF) - return -EINVAL; nhandle |= (id | 0x8000) << 16; } else nhandle |= 0xFFFF << 16; if (handle && new) { nhandle |= handle & 0x7F00; - if (nhandle != handle) + if (nhandle != handle) { + NL_SET_ERR_MSG_FMT(extack, + "Handle mismatch constructed: %x (expected: %x)", + handle, nhandle); return -EINVAL; + } } if (!nhandle) { @@ -478,7 +482,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct route4_filter __rcu **fp; struct route4_filter *fold, *f1, *pfp, *f = NULL; struct route4_bucket *b; - struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_ROUTE4_MAX + 1]; unsigned int h, th; int err; @@ -489,10 +492,12 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if (opt == NULL) + if (NL_REQ_ATTR_CHECK(extack, NULL, tca, TCA_OPTIONS)) { + NL_SET_ERR_MSG_MOD(extack, "Missing options"); return -EINVAL; + } - err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, opt, + err = nla_parse_nested_deprecated(tb, TCA_ROUTE4_MAX, tca[TCA_OPTIONS], route4_policy, NULL); if (err < 0) return err; @@ -679,4 +684,5 @@ static void __exit exit_route4(void) module_init(init_route4) module_exit(exit_route4) +MODULE_DESCRIPTION("Routing table realm based TC classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6663e971a1..d5bdfd4a76 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1489,4 +1489,5 @@ static void __exit exit_u32(void) module_init(init_u32) module_exit(exit_u32) +MODULE_DESCRIPTION("Universal 32bit based TC Classifier"); MODULE_LICENSE("GPL"); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index da34fd4c92..09d8afd04a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -546,7 +546,7 @@ META_COLLECTOR(int_sk_prio) *err = -1; return; } - dst->value = sk->sk_priority; + dst->value = READ_ONCE(sk->sk_priority); } META_COLLECTOR(int_sk_rcvlowat) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index cac870eb78..9a0b85190a 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -574,3 +574,4 @@ static void __exit cbs_module_exit(void) module_init(cbs_module_init) module_exit(cbs_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Credit Based shaper"); diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 19c8511259..ae1da08e26 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -513,3 +513,4 @@ module_init(choke_module_init) module_exit(choke_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Choose and keep responsive flows scheduler"); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 19901e77cd..097740a9af 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -495,3 +495,4 @@ static void __exit drr_exit(void) module_init(drr_init); module_exit(drr_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Deficit Round Robin scheduler"); diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c index 61d1f0e32c..4808159a54 100644 --- a/net/sched/sch_etf.c +++ b/net/sched/sch_etf.c @@ -513,3 +513,4 @@ static void __exit etf_module_exit(void) module_init(etf_module_init) module_exit(etf_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Earliest TxTime First (ETF) qdisc"); diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index b10efeaf06..f7c8849594 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -826,3 +826,4 @@ static void __exit ets_exit(void) module_init(ets_init); module_exit(ets_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler"); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index e1040421b7..450f5c67ac 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -269,3 +269,4 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, return q ? : ERR_PTR(err); } EXPORT_SYMBOL(fifo_create_dflt); +MODULE_DESCRIPTION("Single queue packet and byte based First In First Out(P/BFIFO) scheduler"); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f59a2cb2c8..3a31c47fea 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -2,7 +2,7 @@ /* * net/sched/sch_fq.c Fair Queue Packet Scheduler (per flow pacing) * - * Copyright (C) 2013-2015 Eric Dumazet <edumazet@google.com> + * Copyright (C) 2013-2023 Eric Dumazet <edumazet@google.com> * * Meant to be mostly used for locally generated traffic : * Fast classification depends on skb->sk being set before reaching us. @@ -51,7 +51,8 @@ #include <net/tcp.h> struct fq_skb_cb { - u64 time_to_send; + u64 time_to_send; + u8 band; }; static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb) @@ -73,37 +74,41 @@ struct fq_flow { struct sk_buff *tail; /* last skb in the list */ unsigned long age; /* (jiffies | 1UL) when flow was emptied, for gc */ }; - struct rb_node fq_node; /* anchor in fq_root[] trees */ + union { + struct rb_node fq_node; /* anchor in fq_root[] trees */ + /* Following field is only used for q->internal, + * because q->internal is not hashed in fq_root[] + */ + u64 stat_fastpath_packets; + }; struct sock *sk; u32 socket_hash; /* sk_hash */ int qlen; /* number of packets in flow queue */ -/* Second cache line, used in fq_dequeue() */ +/* Second cache line */ int credit; - /* 32bit hole on 64bit arches */ - + int band; struct fq_flow *next; /* next pointer in RR lists */ struct rb_node rate_node; /* anchor in q->delayed tree */ u64 time_next_packet; -} ____cacheline_aligned_in_smp; +}; struct fq_flow_head { struct fq_flow *first; struct fq_flow *last; }; -struct fq_sched_data { +struct fq_perband_flows { struct fq_flow_head new_flows; - struct fq_flow_head old_flows; + int credit; + int quantum; /* based on band nr : 576KB, 192KB, 64KB */ +}; - struct rb_root delayed; /* for rate limited flows */ - u64 time_next_delayed_flow; - u64 ktime_cache; /* copy of last ktime_get_ns() */ - unsigned long unthrottle_latency_ns; +struct fq_sched_data { +/* Read mostly cache line */ - struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; u32 initial_quantum; u32 flow_refill_delay; @@ -117,24 +122,46 @@ struct fq_sched_data { u8 rate_enable; u8 fq_trees_log; u8 horizon_drop; + u8 prio2band[(TC_PRIO_MAX + 1) >> 2]; + u32 timer_slack; /* hrtimer slack in ns */ + +/* Read/Write fields. */ + + unsigned int band_nr; /* band being serviced in fq_dequeue() */ + + struct fq_perband_flows band_flows[FQ_BANDS]; + + struct fq_flow internal; /* fastpath queue. */ + struct rb_root delayed; /* for rate limited flows */ + u64 time_next_delayed_flow; + unsigned long unthrottle_latency_ns; + + u32 band_pkt_count[FQ_BANDS]; u32 flows; - u32 inactive_flows; + u32 inactive_flows; /* Flows with no packet to send. */ u32 throttled_flows; - u64 stat_gc_flows; - u64 stat_internal_packets; u64 stat_throttled; + struct qdisc_watchdog watchdog; + u64 stat_gc_flows; + +/* Seldom used fields. */ + + u64 stat_band_drops[FQ_BANDS]; u64 stat_ce_mark; u64 stat_horizon_drops; u64 stat_horizon_caps; u64 stat_flows_plimit; u64 stat_pkts_too_long; u64 stat_allocation_errors; - - u32 timer_slack; /* hrtimer slack in ns */ - struct qdisc_watchdog watchdog; }; +/* return the i-th 2-bit value ("crumb") */ +static u8 fq_prio2band(const u8 *prio2band, unsigned int prio) +{ + return (prio2band[prio / 4] >> (2 * (prio & 0x3))) & 0x3; +} + /* * f->tail and f->age share the same location. * We can use the low order bit to differentiate if this location points @@ -159,8 +186,19 @@ static bool fq_flow_is_throttled(const struct fq_flow *f) return f->next == &throttled; } -static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow) +enum new_flow { + NEW_FLOW, + OLD_FLOW +}; + +static void fq_flow_add_tail(struct fq_sched_data *q, struct fq_flow *flow, + enum new_flow list_sel) { + struct fq_perband_flows *pband = &q->band_flows[flow->band]; + struct fq_flow_head *head = (list_sel == NEW_FLOW) ? + &pband->new_flows : + &pband->old_flows; + if (head->first) head->last->next = flow; else @@ -173,7 +211,7 @@ static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f) { rb_erase(&f->rate_node, &q->delayed); q->throttled_flows--; - fq_flow_add_tail(&q->old_flows, f); + fq_flow_add_tail(q, f, OLD_FLOW); } static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) @@ -258,17 +296,61 @@ static void fq_gc(struct fq_sched_data *q, kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree); } -static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) +/* Fast path can be used if : + * 1) Packet tstamp is in the past. + * 2) FQ qlen == 0 OR + * (no flow is currently eligible for transmit, + * AND fast path queue has less than 8 packets) + * 3) No SO_MAX_PACING_RATE on the socket (if any). + * 4) No @maxrate attribute on this qdisc, + * + * FQ can not use generic TCQ_F_CAN_BYPASS infrastructure. + */ +static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb, + u64 now) { + const struct fq_sched_data *q = qdisc_priv(sch); + const struct sock *sk; + + if (fq_skb_cb(skb)->time_to_send > now) + return false; + + if (sch->q.qlen != 0) { + /* Even if some packets are stored in this qdisc, + * we can still enable fast path if all of them are + * scheduled in the future (ie no flows are eligible) + * or in the fast path queue. + */ + if (q->flows != q->inactive_flows + q->throttled_flows) + return false; + + /* Do not allow fast path queue to explode, we want Fair Queue mode + * under pressure. + */ + if (q->internal.qlen >= 8) + return false; + } + + sk = skb->sk; + if (sk && sk_fullsock(sk) && !sk_is_tcp(sk) && + sk->sk_max_pacing_rate != ~0UL) + return false; + + if (q->flow_max_rate != ~0UL) + return false; + + return true; +} + +static struct fq_flow *fq_classify(struct Qdisc *sch, struct sk_buff *skb, + u64 now) +{ + struct fq_sched_data *q = qdisc_priv(sch); struct rb_node **p, *parent; struct sock *sk = skb->sk; struct rb_root *root; struct fq_flow *f; - /* warning: no starvation prevention... */ - if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) - return &q->internal; - /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket * or a listener (SYNCOOKIE mode) * 1) request sockets are not full blown, @@ -299,11 +381,18 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) sk = (struct sock *)((hash << 1) | 1UL); } + if (fq_fastpath_check(sch, skb, now)) { + q->internal.stat_fastpath_packets++; + if (skb->sk == sk && q->rate_enable && + READ_ONCE(sk->sk_pacing_status) != SK_PACING_FQ) + smp_store_release(&sk->sk_pacing_status, + SK_PACING_FQ); + return &q->internal; + } + root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)]; - if (q->flows >= (2U << q->fq_trees_log) && - q->inactive_flows > q->flows/2) - fq_gc(q, root, sk); + fq_gc(q, root, sk); p = &root->rb_node; parent = NULL; @@ -396,7 +485,6 @@ static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow, { fq_erase_head(sch, flow, skb); skb_mark_not_on_list(skb); - flow->qlen--; qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } @@ -434,9 +522,9 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) } static bool fq_packet_beyond_horizon(const struct sk_buff *skb, - const struct fq_sched_data *q) + const struct fq_sched_data *q, u64 now) { - return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); + return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, @@ -444,53 +532,57 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; + u64 now; + u8 band; - if (unlikely(sch->q.qlen >= sch->limit)) + band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); + if (unlikely(q->band_pkt_count[band] >= sch->limit)) { + q->stat_band_drops[band]++; return qdisc_drop(skb, sch, to_free); + } + now = ktime_get_ns(); if (!skb->tstamp) { - fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns(); + fq_skb_cb(skb)->time_to_send = now; } else { - /* Check if packet timestamp is too far in the future. - * Try first if our cached value, to avoid ktime_get_ns() - * cost in most cases. - */ - if (fq_packet_beyond_horizon(skb, q)) { - /* Refresh our cache and check another time */ - q->ktime_cache = ktime_get_ns(); - if (fq_packet_beyond_horizon(skb, q)) { - if (q->horizon_drop) { + /* Check if packet timestamp is too far in the future. */ + if (fq_packet_beyond_horizon(skb, q, now)) { + if (q->horizon_drop) { q->stat_horizon_drops++; return qdisc_drop(skb, sch, to_free); - } - q->stat_horizon_caps++; - skb->tstamp = q->ktime_cache + q->horizon; } + q->stat_horizon_caps++; + skb->tstamp = now + q->horizon; } fq_skb_cb(skb)->time_to_send = skb->tstamp; } - f = fq_classify(skb, q); - if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) { - q->stat_flows_plimit++; - return qdisc_drop(skb, sch, to_free); - } + f = fq_classify(sch, skb, now); - f->qlen++; - qdisc_qstats_backlog_inc(sch, skb); - if (fq_flow_is_detached(f)) { - fq_flow_add_tail(&q->new_flows, f); - if (time_after(jiffies, f->age + q->flow_refill_delay)) - f->credit = max_t(u32, f->credit, q->quantum); - q->inactive_flows--; + if (f != &q->internal) { + if (unlikely(f->qlen >= q->flow_plimit)) { + q->stat_flows_plimit++; + return qdisc_drop(skb, sch, to_free); + } + + if (fq_flow_is_detached(f)) { + fq_flow_add_tail(q, f, NEW_FLOW); + if (time_after(jiffies, f->age + q->flow_refill_delay)) + f->credit = max_t(u32, f->credit, q->quantum); + } + + f->band = band; + q->band_pkt_count[band]++; + fq_skb_cb(skb)->band = band; + if (f->qlen == 0) + q->inactive_flows--; } + f->qlen++; /* Note: this overwrites f->age */ flow_queue_add(f, skb); - if (unlikely(f == &q->internal)) { - q->stat_internal_packets++; - } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -523,13 +615,26 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) } } +static struct fq_flow_head *fq_pband_head_select(struct fq_perband_flows *pband) +{ + if (pband->credit <= 0) + return NULL; + + if (pband->new_flows.first) + return &pband->new_flows; + + return pband->old_flows.first ? &pband->old_flows : NULL; +} + static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); + struct fq_perband_flows *pband; struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; unsigned long rate; + int retry; u32 plen; u64 now; @@ -538,30 +643,38 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) skb = fq_peek(&q->internal); if (unlikely(skb)) { + q->internal.qlen--; fq_dequeue_skb(sch, &q->internal, skb); goto out; } - q->ktime_cache = now = ktime_get_ns(); + now = ktime_get_ns(); fq_check_throttled(q, now); + retry = 0; + pband = &q->band_flows[q->band_nr]; begin: - head = &q->new_flows; - if (!head->first) { - head = &q->old_flows; - if (!head->first) { - if (q->time_next_delayed_flow != ~0ULL) - qdisc_watchdog_schedule_range_ns(&q->watchdog, + head = fq_pband_head_select(pband); + if (!head) { + while (++retry <= FQ_BANDS) { + if (++q->band_nr == FQ_BANDS) + q->band_nr = 0; + pband = &q->band_flows[q->band_nr]; + pband->credit = min(pband->credit + pband->quantum, + pband->quantum); + goto begin; + } + if (q->time_next_delayed_flow != ~0ULL) + qdisc_watchdog_schedule_range_ns(&q->watchdog, q->time_next_delayed_flow, q->timer_slack); - return NULL; - } + return NULL; } f = head->first; - + retry = 0; if (f->credit <= 0) { f->credit += q->quantum; head->first = f->next; - fq_flow_add_tail(&q->old_flows, f); + fq_flow_add_tail(q, f, OLD_FLOW); goto begin; } @@ -581,20 +694,23 @@ begin: INET_ECN_set_ce(skb); q->stat_ce_mark++; } + if (--f->qlen == 0) + q->inactive_flows++; + q->band_pkt_count[fq_skb_cb(skb)->band]--; fq_dequeue_skb(sch, f, skb); } else { head->first = f->next; /* force a pass through old_flows to prevent starvation */ - if ((head == &q->new_flows) && q->old_flows.first) { - fq_flow_add_tail(&q->old_flows, f); + if (head == &pband->new_flows) { + fq_flow_add_tail(q, f, OLD_FLOW); } else { fq_flow_set_detached(f); - q->inactive_flows++; } goto begin; } plen = qdisc_pkt_len(skb); f->credit -= plen; + pband->credit -= plen; if (!q->rate_enable) goto out; @@ -607,7 +723,7 @@ begin: */ if (!skb->tstamp) { if (skb->sk) - rate = min(skb->sk->sk_pacing_rate, rate); + rate = min(READ_ONCE(skb->sk->sk_pacing_rate), rate); if (rate <= q->low_rate_threshold) { f->credit = 0; @@ -686,8 +802,10 @@ static void fq_reset(struct Qdisc *sch) kmem_cache_free(fq_flow_cachep, f); } } - q->new_flows.first = NULL; - q->old_flows.first = NULL; + for (idx = 0; idx < FQ_BANDS; idx++) { + q->band_flows[idx].new_flows.first = NULL; + q->band_flows[idx].old_flows.first = NULL; + } q->delayed = RB_ROOT; q->flows = 0; q->inactive_flows = 0; @@ -779,7 +897,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) return 0; } -static struct netlink_range_validation iq_range = { +static const struct netlink_range_validation iq_range = { .max = INT_MAX, }; @@ -801,8 +919,71 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_TIMER_SLACK] = { .type = NLA_U32 }, [TCA_FQ_HORIZON] = { .type = NLA_U32 }, [TCA_FQ_HORIZON_DROP] = { .type = NLA_U8 }, + [TCA_FQ_PRIOMAP] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_prio_qopt)), + [TCA_FQ_WEIGHTS] = NLA_POLICY_EXACT_LEN(FQ_BANDS * sizeof(s32)), }; +/* compress a u8 array with all elems <= 3 to an array of 2-bit fields */ +static void fq_prio2band_compress_crumb(const u8 *in, u8 *out) +{ + const int num_elems = TC_PRIO_MAX + 1; + int i; + + memset(out, 0, num_elems / 4); + for (i = 0; i < num_elems; i++) + out[i / 4] |= in[i] << (2 * (i & 0x3)); +} + +static void fq_prio2band_decompress_crumb(const u8 *in, u8 *out) +{ + const int num_elems = TC_PRIO_MAX + 1; + int i; + + for (i = 0; i < num_elems; i++) + out[i] = fq_prio2band(in, i); +} + +static int fq_load_weights(struct fq_sched_data *q, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + s32 *weights = nla_data(attr); + int i; + + for (i = 0; i < FQ_BANDS; i++) { + if (weights[i] < FQ_MIN_WEIGHT) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Weight %d less that minimum allowed %d", + weights[i], FQ_MIN_WEIGHT); + return -EINVAL; + } + } + for (i = 0; i < FQ_BANDS; i++) + q->band_flows[i].quantum = weights[i]; + return 0; +} + +static int fq_load_priomap(struct fq_sched_data *q, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const struct tc_prio_qopt *map = nla_data(attr); + int i; + + if (map->bands != FQ_BANDS) { + NL_SET_ERR_MSG_MOD(extack, "FQ only supports 3 bands"); + return -EINVAL; + } + for (i = 0; i < TC_PRIO_MAX + 1; i++) { + if (map->priomap[i] >= FQ_BANDS) { + NL_SET_ERR_MSG_FMT_MOD(extack, "FQ priomap field %d maps to a too high band %d", + i, map->priomap[i]); + return -EINVAL; + } + } + fq_prio2band_compress_crumb(map->priomap, q->prio2band); + return 0; +} + static int fq_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -877,6 +1058,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt, q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } + if (!err && tb[TCA_FQ_PRIOMAP]) + err = fq_load_priomap(q, tb[TCA_FQ_PRIOMAP], extack); + + if (!err && tb[TCA_FQ_WEIGHTS]) + err = fq_load_weights(q, tb[TCA_FQ_WEIGHTS], extack); + if (tb[TCA_FQ_ORPHAN_MASK]) q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]); @@ -928,7 +1115,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct fq_sched_data *q = qdisc_priv(sch); - int err; + int i, err; sch->limit = 10000; q->flow_plimit = 100; @@ -938,8 +1125,13 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, q->flow_max_rate = ~0UL; q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; - q->new_flows.first = NULL; - q->old_flows.first = NULL; + for (i = 0; i < FQ_BANDS; i++) { + q->band_flows[i].new_flows.first = NULL; + q->band_flows[i].old_flows.first = NULL; + } + q->band_flows[0].quantum = 9 << 16; + q->band_flows[1].quantum = 3 << 16; + q->band_flows[2].quantum = 1 << 16; q->delayed = RB_ROOT; q->fq_root = NULL; q->fq_trees_log = ilog2(1024); @@ -954,6 +1146,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt, /* Default ce_threshold of 4294 seconds */ q->ce_threshold = (u64)NSEC_PER_USEC * ~0U; + fq_prio2band_compress_crumb(sch_default_prio2band, q->prio2band); qdisc_watchdog_init_clockid(&q->watchdog, sch, CLOCK_MONOTONIC); if (opt) @@ -968,8 +1161,12 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct fq_sched_data *q = qdisc_priv(sch); u64 ce_threshold = q->ce_threshold; + struct tc_prio_qopt prio = { + .bands = FQ_BANDS, + }; u64 horizon = q->horizon; struct nlattr *opts; + s32 weights[3]; opts = nla_nest_start_noflag(skb, TCA_OPTIONS); if (opts == NULL) @@ -999,6 +1196,16 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop)) goto nla_put_failure; + fq_prio2band_decompress_crumb(q->prio2band, prio.priomap); + if (nla_put(skb, TCA_FQ_PRIOMAP, sizeof(prio), &prio)) + goto nla_put_failure; + + weights[0] = q->band_flows[0].quantum; + weights[1] = q->band_flows[1].quantum; + weights[2] = q->band_flows[2].quantum; + if (nla_put(skb, TCA_FQ_WEIGHTS, sizeof(weights), &weights)) + goto nla_put_failure; + return nla_nest_end(skb, opts); nla_put_failure: @@ -1009,11 +1216,15 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); struct tc_fq_qd_stats st; + int i; + + st.pad = 0; sch_tree_lock(sch); st.gc_flows = q->stat_gc_flows; - st.highprio_packets = q->stat_internal_packets; + st.highprio_packets = 0; + st.fastpath_packets = q->internal.stat_fastpath_packets; st.tcp_retrans = 0; st.throttled = q->stat_throttled; st.flows_plimit = q->stat_flows_plimit; @@ -1029,6 +1240,10 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) st.ce_mark = q->stat_ce_mark; st.horizon_drops = q->stat_horizon_drops; st.horizon_caps = q->stat_horizon_caps; + for (i = 0; i < FQ_BANDS; i++) { + st.band_drops[i] = q->stat_band_drops[i]; + st.band_pkt_count[i] = q->band_pkt_count[i]; + } sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); @@ -1056,7 +1271,7 @@ static int __init fq_module_init(void) fq_flow_cachep = kmem_cache_create("fq_flow_cache", sizeof(struct fq_flow), - 0, 0, NULL); + 0, SLAB_HWCACHE_ALIGN, NULL); if (!fq_flow_cachep) return -ENOMEM; diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 68e6acd0f1..5b595773e5 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -202,7 +202,7 @@ out: return NET_XMIT_CN; } -static struct netlink_range_validation fq_pie_q_range = { +static const struct netlink_range_validation fq_pie_q_range = { .min = 1, .max = 1 << 20, }; diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index a9bd0a2358..ce63414185 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -96,7 +96,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, unsigned long orig_dst; sch_frag_prepare_frag(skb, xmit); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; @@ -112,7 +112,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, sch_frag_prepare_frag(skb, xmit); memset(&sch_frag_rt, 0, sizeof(sch_frag_rt)); - dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1, + dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT); sch_frag_rt.dst.dev = skb->dev; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 5d7e23f4cc..4195a4bc26 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -694,9 +694,10 @@ struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .owner = THIS_MODULE, }; -static const u8 prio2band[TC_PRIO_MAX + 1] = { - 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 +const u8 sch_default_prio2band[TC_PRIO_MAX + 1] = { + 1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }; +EXPORT_SYMBOL(sch_default_prio2band); /* 3-band FIFO queue: old style, but should be a bit faster than generic prio+fifo combination. @@ -721,7 +722,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - int band = prio2band[skb->priority & TC_PRIO_MAX]; + int band = sch_default_prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct skb_array *q = band2list(priv, band); unsigned int pkt_len = qdisc_pkt_len(skb); @@ -830,7 +831,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) { struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; - memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); + memcpy(&opt.priomap, sch_default_prio2band, TC_PRIO_MAX + 1); if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; return skb->len; diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 872d127c9d..8c61eb3dc9 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -945,3 +945,4 @@ module_init(gred_module_init) module_exit(gred_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic Random Early Detection qdisc"); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 880c5f16b2..16c45da403 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1693,5 +1693,6 @@ hfsc_cleanup(void) } MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Fair Service Curve scheduler"); module_init(hfsc_init); module_exit(hfsc_cleanup); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0d947414e6..7349233eaa 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -2179,3 +2179,4 @@ static void __exit htb_module_exit(void) module_init(htb_module_init) module_exit(htb_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler"); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index a463a63192..5fa9eaa79b 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -370,3 +370,4 @@ module_exit(ingress_module_exit); MODULE_ALIAS("sch_clsact"); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Ingress and clsact based ingress and egress qdiscs"); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 793009f445..43e53ee00a 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -789,3 +789,4 @@ module_init(mqprio_module_init); module_exit(mqprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Classful multiqueue prio qdisc"); diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c index 83b3793c40..b3a5572c16 100644 --- a/net/sched/sch_mqprio_lib.c +++ b/net/sched/sch_mqprio_lib.c @@ -129,3 +129,4 @@ void mqprio_fp_to_offload(u32 fp[TC_QOPT_MAX_QUEUE], EXPORT_SYMBOL_GPL(mqprio_fp_to_offload); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Shared mqprio qdisc code currently between taprio and mqprio"); diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 75c9c86018..d66d5f0ec0 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -410,3 +410,4 @@ module_init(multiq_module_init) module_exit(multiq_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Multi queue to hardware queue mapping qdisc"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 4ad39a4a3c..fa678eb885 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -67,7 +67,7 @@ struct disttable { u32 size; - s16 table[]; + s16 table[] __counted_by(size); }; struct netem_sched_data { @@ -1307,3 +1307,4 @@ static void __exit netem_module_exit(void) module_init(netem_module_init) module_exit(netem_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Network characteristics emulator qdisc"); diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 35f49edf63..992f0c8d79 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -226,3 +226,4 @@ static void __exit plug_module_exit(void) module_init(plug_module_init) module_exit(plug_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Qdisc to plug and unplug traffic via netlink control"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fdc5ef52c3..8ecdd3ef6f 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -433,3 +433,4 @@ module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Simple 3-band priority qdisc"); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 546c10adca..48a604c320 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -213,7 +213,7 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } -static struct netlink_range_validation lmax_range = { +static const struct netlink_range_validation lmax_range = { .min = QFQ_MIN_LMAX, .max = QFQ_MAX_LMAX, }; @@ -1003,7 +1003,7 @@ static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg, *cl = list_first_entry(&agg->active, struct qfq_class, alist); skb = (*cl)->qdisc->ops->peek((*cl)->qdisc); if (skb == NULL) - WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n"); + qdisc_warn_nonwc("qfq_dequeue", (*cl)->qdisc); else *len = qdisc_pkt_len(skb); @@ -1535,3 +1535,4 @@ static void __exit qfq_exit(void) module_init(qfq_init); module_exit(qfq_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Quick Fair Queueing Plus qdisc"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 16277b6a02..607b6c8b3a 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -563,3 +563,4 @@ module_init(red_module_init) module_exit(red_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Random Early Detection qdisc"); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 66dcb18638..eb77558fa3 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -937,3 +937,4 @@ static void __exit sfq_module_exit(void) module_init(sfq_module_init) module_exit(sfq_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Stochastic Fairness qdisc"); diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 5df2dacb7b..28beb11762 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -307,3 +307,4 @@ module_init(skbprio_module_init) module_exit(skbprio_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SKB priority based scheduling qdisc"); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 1cb5e41c0e..31a8252bd0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1015,7 +1015,7 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { TC_FP_PREEMPTIBLE), }; -static struct netlink_range_validation_signed taprio_cycle_time_range = { +static const struct netlink_range_validation_signed taprio_cycle_time_range = { .min = 0, .max = INT_MAX, }; @@ -2572,3 +2572,4 @@ static void __exit taprio_module_exit(void) module_init(taprio_module_init); module_exit(taprio_module_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Time Aware Priority qdisc"); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 17d2d00ddb..dd6b1a723b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -621,3 +621,4 @@ static void __exit tbf_module_exit(void) module_init(tbf_module_init) module_exit(tbf_module_exit) MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Token Bucket Filter qdisc"); diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 7721239c18..59304611dc 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -523,3 +523,4 @@ module_init(teql_init); module_exit(teql_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc"); diff --git a/net/sctp/diag.c b/net/sctp/diag.c index c3d6b92dd3..eb05131ff1 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -527,4 +527,5 @@ static void __exit sctp_diag_exit(void) module_init(sctp_diag_init); module_exit(sctp_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SCTP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-132); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 43f2731bf5..24368f755a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -128,7 +128,6 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, { struct sctp_association *asoc = t->asoc; struct sock *sk = asoc->base.sk; - struct ipv6_pinfo *np; int err = 0; switch (type) { @@ -149,9 +148,8 @@ static void sctp_v6_err_handle(struct sctp_transport *t, struct sk_buff *skb, break; } - np = inet6_sk(sk); icmpv6_err_convert(type, code, &err); - if (!sock_owned_by_user(sk) && np->recverr) { + if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { @@ -249,7 +247,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) rcu_read_lock(); res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt), - tclass, sk->sk_priority); + tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } @@ -298,7 +296,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (t->flowlabel & SCTP_FLOWLABEL_SET_MASK) fl6->flowlabel = htonl(t->flowlabel & SCTP_FLOWLABEL_VAL_MASK); - if (np->sndflow && (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { + if (inet6_test_bit(SNDFLOW, sk) && + (fl6->flowlabel & IPV6_FLOWLABEL_MASK)) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6->flowlabel); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2185f44198..94c6dd53cd 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -426,7 +426,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct dst_entry *dst = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; - __u8 tos = inet_sk(sk)->tos; + u8 tos = READ_ONCE(inet_sk(sk)->tos); if (t->dscp & SCTP_DSCP_SET_MASK) tos = t->dscp & SCTP_DSCP_VAL_MASK; @@ -1057,7 +1057,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) struct flowi4 *fl4 = &t->fl.u.ip4; struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); - __u8 dscp = inet->tos; + __u8 dscp = READ_ONCE(inet->tos); __be16 df = 0; pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 08527d882e..f80208edd6 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3303,7 +3303,7 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, /* Process the TLVs contained within the ASCONF chunk. */ sctp_walk_params(param, addip) { - /* Skip preceeding address parameters. */ + /* Skip preceding address parameters. */ if (param.p->type == SCTP_PARAM_IPV4_ADDRESS || param.p->type == SCTP_PARAM_IPV6_ADDRESS) continue; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ef5b5d498e..73eebddbbf 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -493,7 +493,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, nsk->sk_sndtimeo = osk->sk_sndtimeo; nsk->sk_rcvtimeo = osk->sk_rcvtimeo; nsk->sk_mark = READ_ONCE(osk->sk_mark); - nsk->sk_priority = osk->sk_priority; + nsk->sk_priority = READ_ONCE(osk->sk_priority); nsk->sk_rcvlowat = osk->sk_rcvlowat; nsk->sk_bound_dev_if = osk->sk_bound_dev_if; nsk->sk_err = osk->sk_err; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 72f4d81a3f..1489a8421d 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -155,10 +155,12 @@ static int smc_clc_ueid_remove(char *ueid) rc = 0; } } +#if IS_ENABLED(CONFIG_S390) if (!rc && !smc_clc_eid_table.ueid_cnt) { smc_clc_eid_table.seid_enabled = 1; rc = -EAGAIN; /* indicate success and enabling of seid */ } +#endif write_unlock(&smc_clc_eid_table.lock); return rc; } @@ -273,22 +275,30 @@ err: int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) { +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); smc_clc_eid_table.seid_enabled = 1; write_unlock(&smc_clc_eid_table.lock); return 0; +#else + return -EOPNOTSUPP; +#endif } int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) { int rc = 0; +#if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); if (!smc_clc_eid_table.ueid_cnt) rc = -ENOENT; else smc_clc_eid_table.seid_enabled = 0; write_unlock(&smc_clc_eid_table.lock); +#else + rc = -EOPNOTSUPP; +#endif return rc; } @@ -1269,7 +1279,11 @@ void __init smc_clc_init(void) INIT_LIST_HEAD(&smc_clc_eid_table.list); rwlock_init(&smc_clc_eid_table.lock); smc_clc_eid_table.ueid_cnt = 0; +#if IS_ENABLED(CONFIG_S390) smc_clc_eid_table.seid_enabled = 1; +#else + smc_clc_eid_table.seid_enabled = 0; +#endif } void smc_clc_exit(void) diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index 37833b96b5..fb9e5cc128 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -267,5 +267,6 @@ static void __exit smc_diag_exit(void) module_init(smc_diag_init); module_exit(smc_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SMC socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 43 /* AF_SMC */); MODULE_ALIAS_GENL_FAMILY(SMCR_GENL_FAMILY_NAME); diff --git a/net/socket.c b/net/socket.c index 8d83c4bb16..89d79205bf 100644 --- a/net/socket.c +++ b/net/socket.c @@ -403,7 +403,7 @@ static const struct xattr_handler sockfs_security_xattr_handler = { .set = sockfs_security_xattr_set, }; -static const struct xattr_handler *sockfs_xattr_handlers[] = { +static const struct xattr_handler * const sockfs_xattr_handlers[] = { &sockfs_xattr_handler, &sockfs_security_xattr_handler, NULL @@ -1687,20 +1687,16 @@ struct file *__sys_socket_file(int family, int type, int protocol) * Therefore, __weak is needed to ensure that the call is still * emitted, by telling the compiler that we don't know what the * function might eventually be. - * - * __diag_* below are needed to dismiss the missing prototype warning. */ -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "A fmod_ret entry point for BPF programs"); +__bpf_hook_start(); __weak noinline int update_socket_protocol(int family, int type, int protocol) { return protocol; } -__diag_pop(); +__bpf_hook_end(); int __sys_socket(int family, int type, int protocol) { @@ -2281,33 +2277,23 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); } -/* - * Set a socket option. Because we don't know the option lengths we have - * to pass the user mode parameter for the protocols to sort out. - */ -int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, - int optlen) +int do_sock_setsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, int optlen) { - sockptr_t optval = USER_SOCKPTR(user_optval); const struct proto_ops *ops; char *kernel_optval = NULL; - int err, fput_needed; - struct socket *sock; + int err; if (optlen < 0) return -EINVAL; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - return err; - err = security_socket_setsockopt(sock, level, optname); if (err) goto out_put; - if (!in_compat_syscall()) + if (!compat) err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, - user_optval, &optlen, + optval, &optlen, &kernel_optval); if (err < 0) goto out_put; @@ -2328,6 +2314,27 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, optlen); kfree(kernel_optval); out_put: + return err; +} +EXPORT_SYMBOL(do_sock_setsockopt); + +/* Set a socket option. Because we don't know the option lengths we have + * to pass the user mode parameter for the protocols to sort out. + */ +int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, + int optlen) +{ + sockptr_t optval = USER_SOCKPTR(user_optval); + bool compat = in_compat_syscall(); + int err, fput_needed; + struct socket *sock; + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + return err; + + err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen); + fput_light(sock->file, fput_needed); return err; } @@ -2341,43 +2348,62 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int optname)); -/* - * Get a socket option. Because we don't know the option lengths we have - * to pass a user mode parameter for the protocols to sort out. - */ -int __sys_getsockopt(int fd, int level, int optname, char __user *optval, - int __user *optlen) +int do_sock_getsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, sockptr_t optlen) { int max_optlen __maybe_unused; const struct proto_ops *ops; - int err, fput_needed; - struct socket *sock; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - return err; + int err; err = security_socket_getsockopt(sock, level, optname); if (err) - goto out_put; + return err; - if (!in_compat_syscall()) + if (!compat) max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); ops = READ_ONCE(sock->ops); - if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, optname, optval, optlen); - else if (unlikely(!ops->getsockopt)) + if (level == SOL_SOCKET) { + err = sk_getsockopt(sock->sk, level, optname, optval, optlen); + } else if (unlikely(!ops->getsockopt)) { err = -EOPNOTSUPP; - else - err = ops->getsockopt(sock, level, optname, optval, - optlen); + } else { + if (WARN_ONCE(optval.is_kernel || optlen.is_kernel, + "Invalid argument type")) + return -EOPNOTSUPP; + + err = ops->getsockopt(sock, level, optname, optval.user, + optlen.user); + } - if (!in_compat_syscall()) + if (!compat) err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, optval, optlen, max_optlen, err); -out_put: + + return err; +} +EXPORT_SYMBOL(do_sock_getsockopt); + +/* + * Get a socket option. Because we don't know the option lengths we have + * to pass a user mode parameter for the protocols to sort out. + */ +int __sys_getsockopt(int fd, int level, int optname, char __user *optval, + int __user *optlen) +{ + int err, fput_needed; + struct socket *sock; + bool compat; + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + return err; + + compat = in_compat_syscall(); + err = do_sock_getsockopt(sock, compat, level, optname, + USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); + fput_light(sock->file, fput_needed); return err; } diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index ec41b26af7..04534ea537 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -863,11 +863,7 @@ rpcauth_uptodatecred(struct rpc_task *task) test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; } -static struct shrinker rpc_cred_shrinker = { - .count_objects = rpcauth_cache_shrink_count, - .scan_objects = rpcauth_cache_shrink_scan, - .seeks = DEFAULT_SEEKS, -}; +static struct shrinker *rpc_cred_shrinker; int __init rpcauth_init_module(void) { @@ -876,9 +872,17 @@ int __init rpcauth_init_module(void) err = rpc_init_authunix(); if (err < 0) goto out1; - err = register_shrinker(&rpc_cred_shrinker, "sunrpc_cred"); - if (err < 0) + rpc_cred_shrinker = shrinker_alloc(0, "sunrpc_cred"); + if (!rpc_cred_shrinker) { + err = -ENOMEM; goto out2; + } + + rpc_cred_shrinker->count_objects = rpcauth_cache_shrink_count; + rpc_cred_shrinker->scan_objects = rpcauth_cache_shrink_scan; + + shrinker_register(rpc_cred_shrinker); + return 0; out2: rpc_destroy_authunix(); @@ -889,5 +893,5 @@ out1: void rpcauth_remove_module(void) { rpc_destroy_authunix(); - unregister_shrinker(&rpc_cred_shrinker); + shrinker_free(rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 9734e1d9f9..d2b02710ab 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -34,9 +34,9 @@ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include <crypto/algapi.h> #include <crypto/hash.h> #include <crypto/skcipher.h> +#include <crypto/utils.h> #include <linux/err.h> #include <linux/types.h> #include <linux/mm.h> diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 4fbc50a0a2..ef0e6af9fc 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c @@ -57,11 +57,9 @@ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. */ -#include <crypto/algapi.h> #include <linux/types.h> #include <linux/jiffies.h> #include <linux/sunrpc/gss_krb5.h> -#include <linux/crypto.h> #include "gss_krb5_internal.h" diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 65a6c6429a..caa94cf571 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -83,7 +83,6 @@ static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt) return NULL; req->rq_xprt = xprt; - INIT_LIST_HEAD(&req->rq_bc_list); /* Preallocate one XDR receive buffer */ if (xprt_alloc_xdr_buf(&req->rq_rcv_buf, gfp_flags) < 0) { @@ -349,10 +348,8 @@ found: } /* - * Add callback request to callback list. The callback - * service sleeps on the sv_cb_waitq waiting for new - * requests. Wake it up after adding enqueing the - * request. + * Add callback request to callback list. Wake a thread + * on the first pool (usually the only pool) to handle it. */ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) { @@ -369,8 +366,6 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) dprintk("RPC: add callback request to list\n"); xprt_get(xprt); - spin_lock(&bc_serv->sv_cb_lock); - list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); - wake_up(&bc_serv->sv_cb_waitq); - spin_unlock(&bc_serv->sv_cb_lock); + lwq_enqueue(&req->rq_bc_list, &bc_serv->sv_cb_list); + svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 339dfc5b92..daa9582ec8 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2223,7 +2223,7 @@ call_connect_status(struct rpc_task *task) } xprt_switch_put(xps); if (!task->tk_xprt) - return; + goto out; } goto out_retry; case -ENOBUFS: @@ -2238,6 +2238,7 @@ out_next: out_retry: /* Check for timeouts before looping back to call_bind */ task->tk_action = call_bind; +out: rpc_check_timeout(task); } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index f420d84573..dcc2b4f49e 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -472,7 +472,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) return NULL; inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 812fda9d45..3f2ea7a049 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -438,9 +438,7 @@ EXPORT_SYMBOL_GPL(svc_bind); static void __svc_init_bc(struct svc_serv *serv) { - INIT_LIST_HEAD(&serv->sv_cb_list); - spin_lock_init(&serv->sv_cb_lock); - init_waitqueue_head(&serv->sv_cb_waitq); + lwq_init(&serv->sv_cb_list); } #else static void @@ -509,9 +507,9 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, i, serv->sv_name); pool->sp_id = i; - INIT_LIST_HEAD(&pool->sp_sockets); + lwq_init(&pool->sp_xprts); INIT_LIST_HEAD(&pool->sp_all_threads); - spin_lock_init(&pool->sp_lock); + init_llist_head(&pool->sp_idle_threads); percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); @@ -575,11 +573,12 @@ svc_destroy(struct kref *ref) timer_shutdown_sync(&serv->sv_temptimer); /* - * The last user is gone and thus all sockets have to be destroyed to - * the point. Check this. + * Remaining transports at this point are not expected. */ - BUG_ON(!list_empty(&serv->sv_permsocks)); - BUG_ON(!list_empty(&serv->sv_tempsocks)); + WARN_ONCE(!list_empty(&serv->sv_permsocks), + "SVC: permsocks remain for %s\n", serv->sv_program->pg_name); + WARN_ONCE(!list_empty(&serv->sv_tempsocks), + "SVC: tempsocks remain for %s\n", serv->sv_program->pg_name); cache_clean_deferred(serv); @@ -642,7 +641,6 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) folio_batch_init(&rqstp->rq_fbatch); - __set_bit(RQ_BUSY, &rqstp->rq_flags); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -682,10 +680,13 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) serv->sv_nrthreads += 1; spin_unlock_bh(&serv->sv_lock); - spin_lock_bh(&pool->sp_lock); - pool->sp_nrthreads++; + atomic_inc(&pool->sp_nrthreads); + + /* Protected by whatever lock the service uses when calling + * svc_set_num_threads() + */ list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); - spin_unlock_bh(&pool->sp_lock); + return rqstp; } @@ -701,23 +702,25 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) void svc_pool_wake_idle_thread(struct svc_pool *pool) { struct svc_rqst *rqstp; + struct llist_node *ln; rcu_read_lock(); - list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) - continue; - + ln = READ_ONCE(pool->sp_idle_threads.first); + if (ln) { + rqstp = llist_entry(ln, struct svc_rqst, rq_idle); WRITE_ONCE(rqstp->rq_qtime, ktime_get()); - wake_up_process(rqstp->rq_task); + if (!task_is_running(rqstp->rq_task)) { + wake_up_process(rqstp->rq_task); + trace_svc_wake_up(rqstp->rq_task->pid); + percpu_counter_inc(&pool->sp_threads_woken); + } rcu_read_unlock(); - percpu_counter_inc(&pool->sp_threads_woken); - trace_svc_wake_up(rqstp->rq_task->pid); return; } rcu_read_unlock(); - set_bit(SP_CONGESTED, &pool->sp_flags); } +EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread); static struct svc_pool * svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) @@ -725,36 +728,38 @@ svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools]; } -static struct task_struct * -svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) +static struct svc_pool * +svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, + unsigned int *state) { + struct svc_pool *pool; unsigned int i; - struct task_struct *task = NULL; + +retry: + pool = target_pool; if (pool != NULL) { - spin_lock_bh(&pool->sp_lock); + if (atomic_inc_not_zero(&pool->sp_nrthreads)) + goto found_pool; + return NULL; } else { for (i = 0; i < serv->sv_nrpools; i++) { pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; - spin_lock_bh(&pool->sp_lock); - if (!list_empty(&pool->sp_all_threads)) + if (atomic_inc_not_zero(&pool->sp_nrthreads)) goto found_pool; - spin_unlock_bh(&pool->sp_lock); } return NULL; } found_pool: - if (!list_empty(&pool->sp_all_threads)) { - struct svc_rqst *rqstp; - - rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all); - set_bit(RQ_VICTIM, &rqstp->rq_flags); - list_del_rcu(&rqstp->rq_all); - task = rqstp->rq_task; - } - spin_unlock_bh(&pool->sp_lock); - return task; + set_bit(SP_VICTIM_REMAINS, &pool->sp_flags); + set_bit(SP_NEED_VICTIM, &pool->sp_flags); + if (!atomic_dec_and_test(&pool->sp_nrthreads)) + return pool; + /* Nothing left in this pool any more */ + clear_bit(SP_NEED_VICTIM, &pool->sp_flags); + clear_bit(SP_VICTIM_REMAINS, &pool->sp_flags); + goto retry; } static int @@ -795,18 +800,16 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) static int svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - struct svc_rqst *rqstp; - struct task_struct *task; unsigned int state = serv->sv_nrthreads-1; + struct svc_pool *victim; do { - task = svc_pool_victim(serv, pool, &state); - if (task == NULL) + victim = svc_pool_victim(serv, pool, &state); + if (!victim) break; - rqstp = kthread_data(task); - /* Did we lose a race to svo_function threadfn? */ - if (kthread_stop(task) == -EINTR) - svc_exit_thread(rqstp); + svc_pool_wake_idle_thread(victim); + wait_on_bit(&victim->sp_flags, SP_VICTIM_REMAINS, + TASK_IDLE); nrservs++; } while (nrservs < 0); return 0; @@ -832,13 +835,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) int svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) { - if (pool == NULL) { + if (!pool) nrservs -= serv->sv_nrthreads; - } else { - spin_lock_bh(&pool->sp_lock); - nrservs -= pool->sp_nrthreads; - spin_unlock_bh(&pool->sp_lock); - } + else + nrservs -= atomic_read(&pool->sp_nrthreads); if (nrservs > 0) return svc_start_kthreads(serv, pool, nrservs); @@ -924,11 +924,9 @@ svc_exit_thread(struct svc_rqst *rqstp) struct svc_serv *serv = rqstp->rq_server; struct svc_pool *pool = rqstp->rq_pool; - spin_lock_bh(&pool->sp_lock); - pool->sp_nrthreads--; - if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags)) - list_del_rcu(&rqstp->rq_all); - spin_unlock_bh(&pool->sp_lock); + list_del_rcu(&rqstp->rq_all); + + atomic_dec(&pool->sp_nrthreads); spin_lock_bh(&serv->sv_lock); serv->sv_nrthreads -= 1; @@ -938,6 +936,11 @@ svc_exit_thread(struct svc_rqst *rqstp) svc_rqst_free(rqstp); svc_put(serv); + /* That svc_put() cannot be the last, because the thread + * waiting for SP_VICTIM_REMAINS to clear must hold + * a reference. So it is still safe to access pool. + */ + clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags); } EXPORT_SYMBOL_GPL(svc_exit_thread); @@ -1544,24 +1547,20 @@ out_drop: } #if defined(CONFIG_SUNRPC_BACKCHANNEL) -/* - * Process a backchannel RPC request that arrived over an existing - * outbound connection +/** + * svc_process_bc - process a reverse-direction RPC request + * @req: RPC request to be used for client-side processing + * @rqstp: server-side execution context + * */ -int -bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, - struct svc_rqst *rqstp) +void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) { struct rpc_task *task; int proc_error; - int error; - - dprintk("svc: %s(%p)\n", __func__, req); /* Build the svc_rqst used by the common processing routine */ rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; - rqstp->rq_server = serv; rqstp->rq_bc_net = req->rq_xprt->xprt_net; rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); @@ -1590,10 +1589,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, * been processed by the caller. */ svcxdr_init_decode(rqstp); - if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2)) { - error = -EINVAL; - goto out; - } + if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2)) + return; /* Parse and execute the bc call */ proc_error = svc_process_common(rqstp); @@ -1602,26 +1599,18 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, if (!proc_error) { /* Processing error: drop the request */ xprt_free_bc_request(req); - error = -EINVAL; - goto out; + return; } /* Finally, send the reply synchronously */ memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); task = rpc_run_bc_task(req); - if (IS_ERR(task)) { - error = PTR_ERR(task); - goto out; - } + if (IS_ERR(task)) + return; WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); - error = task->tk_status; rpc_put_task(task); - -out: - dprintk("svc: %s(), error=%d\n", __func__, error); - return error; } -EXPORT_SYMBOL_GPL(bc_svc_process); +EXPORT_SYMBOL_GPL(svc_process_bc); #endif /* CONFIG_SUNRPC_BACKCHANNEL */ /** diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5cfe5c7408..1b71055fc3 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -9,7 +9,6 @@ #include <linux/sched/mm.h> #include <linux/errno.h> #include <linux/freezer.h> -#include <linux/kthread.h> #include <linux/slab.h> #include <net/sock.h> #include <linux/sunrpc/addr.h> @@ -17,6 +16,7 @@ #include <linux/sunrpc/svc_xprt.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprt.h> +#include <linux/sunrpc/bc_xprt.h> #include <linux/module.h> #include <linux/netdevice.h> #include <trace/events/sunrpc.h> @@ -201,7 +201,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, kref_init(&xprt->xpt_ref); xprt->xpt_server = serv; INIT_LIST_HEAD(&xprt->xpt_list); - INIT_LIST_HEAD(&xprt->xpt_ready); INIT_LIST_HEAD(&xprt->xpt_deferred); INIT_LIST_HEAD(&xprt->xpt_users); mutex_init(&xprt->xpt_mutex); @@ -472,9 +471,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) pool = svc_pool_for_cpu(xprt->xpt_server); percpu_counter_inc(&pool->sp_sockets_queued); - spin_lock_bh(&pool->sp_lock); - list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); - spin_unlock_bh(&pool->sp_lock); + lwq_enqueue(&xprt->xpt_ready, &pool->sp_xprts); svc_pool_wake_idle_thread(pool); } @@ -487,18 +484,9 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) { struct svc_xprt *xprt = NULL; - if (list_empty(&pool->sp_sockets)) - goto out; - - spin_lock_bh(&pool->sp_lock); - if (likely(!list_empty(&pool->sp_sockets))) { - xprt = list_first_entry(&pool->sp_sockets, - struct svc_xprt, xpt_ready); - list_del_init(&xprt->xpt_ready); + xprt = lwq_dequeue(&pool->sp_xprts, struct svc_xprt, xpt_ready); + if (xprt) svc_xprt_get(xprt); - } - spin_unlock_bh(&pool->sp_lock); -out: return xprt; } @@ -673,7 +661,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) continue; set_current_state(TASK_IDLE); - if (kthread_should_stop()) { + if (svc_thread_should_stop(rqstp)) { set_current_state(TASK_RUNNING); return false; } @@ -698,7 +686,7 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp) } static bool -rqst_should_sleep(struct svc_rqst *rqstp) +svc_thread_should_sleep(struct svc_rqst *rqstp) { struct svc_pool *pool = rqstp->rq_pool; @@ -707,65 +695,51 @@ rqst_should_sleep(struct svc_rqst *rqstp) return false; /* was a socket queued? */ - if (!list_empty(&pool->sp_sockets)) + if (!lwq_empty(&pool->sp_xprts)) return false; /* are we shutting down? */ - if (kthread_should_stop()) + if (svc_thread_should_stop(rqstp)) return false; - /* are we freezing? */ - if (freezing(current)) - return false; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + if (svc_is_backchannel(rqstp)) { + if (!lwq_empty(&rqstp->rq_server->sv_cb_list)) + return false; + } +#endif return true; } -static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp) +static void svc_thread_wait_for_work(struct svc_rqst *rqstp) { - struct svc_pool *pool = rqstp->rq_pool; - - /* rq_xprt should be clear on entry */ - WARN_ON_ONCE(rqstp->rq_xprt); - - rqstp->rq_xprt = svc_xprt_dequeue(pool); - if (rqstp->rq_xprt) - goto out_found; - - set_current_state(TASK_IDLE); - smp_mb__before_atomic(); - clear_bit(SP_CONGESTED, &pool->sp_flags); - clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); - - if (likely(rqst_should_sleep(rqstp))) - schedule(); - else + struct svc_pool *pool = rqstp->rq_pool; + + if (svc_thread_should_sleep(rqstp)) { + set_current_state(TASK_IDLE | TASK_FREEZABLE); + llist_add(&rqstp->rq_idle, &pool->sp_idle_threads); + if (likely(svc_thread_should_sleep(rqstp))) + schedule(); + + while (!llist_del_first_this(&pool->sp_idle_threads, + &rqstp->rq_idle)) { + /* Work just became available. This thread can only + * handle it after removing rqstp from the idle + * list. If that attempt failed, some other thread + * must have queued itself after finding no + * work to do, so that thread has taken responsibly + * for this new work. This thread can safely sleep + * until woken again. + */ + schedule(); + set_current_state(TASK_IDLE | TASK_FREEZABLE); + } __set_current_state(TASK_RUNNING); - + } else { + cond_resched(); + } try_to_freeze(); - - set_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb__after_atomic(); - clear_bit(SP_TASK_PENDING, &pool->sp_flags); - rqstp->rq_xprt = svc_xprt_dequeue(pool); - if (rqstp->rq_xprt) - goto out_found; - - if (kthread_should_stop()) - return NULL; - return NULL; -out_found: - clear_bit(SP_TASK_PENDING, &pool->sp_flags); - /* Normally we will wait up to 5 seconds for any required - * cache information to be provided. - */ - if (!test_bit(SP_CONGESTED, &pool->sp_flags)) - rqstp->rq_chandle.thread_wait = 5*HZ; - else - rqstp->rq_chandle.thread_wait = 1*HZ; - trace_svc_xprt_dequeue(rqstp); - return rqstp->rq_xprt; } static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) @@ -784,7 +758,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt svc_xprt_received(newxpt); } -static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) +static void svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) { struct svc_serv *serv = rqstp->rq_server; int len = 0; @@ -825,11 +799,35 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) len = xprt->xpt_ops->xpo_recvfrom(rqstp); rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + if (len <= 0) + goto out; + + trace_svc_xdr_recvfrom(&rqstp->rq_arg); + + clear_bit(XPT_OLD, &xprt->xpt_flags); + + rqstp->rq_chandle.defer = svc_defer; + + if (serv->sv_stats) + serv->sv_stats->netcnt++; + percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived); + rqstp->rq_stime = ktime_get(); + svc_process(rqstp); } else svc_xprt_received(xprt); out: - return len; + rqstp->rq_res.len = 0; + svc_xprt_release(rqstp); +} + +static void svc_thread_wake_next(struct svc_rqst *rqstp) +{ + if (!svc_thread_should_sleep(rqstp)) + /* More work pending after I dequeued some, + * wake another worker + */ + svc_pool_wake_idle_thread(rqstp->rq_pool); } /** @@ -842,44 +840,51 @@ out: */ void svc_recv(struct svc_rqst *rqstp) { - struct svc_xprt *xprt = NULL; - struct svc_serv *serv = rqstp->rq_server; - int len; + struct svc_pool *pool = rqstp->rq_pool; if (!svc_alloc_arg(rqstp)) - goto out; + return; - try_to_freeze(); - cond_resched(); - if (kthread_should_stop()) - goto out; + svc_thread_wait_for_work(rqstp); - xprt = svc_get_next_xprt(rqstp); - if (!xprt) - goto out; + clear_bit(SP_TASK_PENDING, &pool->sp_flags); - len = svc_handle_xprt(rqstp, xprt); + if (svc_thread_should_stop(rqstp)) { + svc_thread_wake_next(rqstp); + return; + } - /* No data, incomplete (TCP) read, or accept() */ - if (len <= 0) - goto out_release; + rqstp->rq_xprt = svc_xprt_dequeue(pool); + if (rqstp->rq_xprt) { + struct svc_xprt *xprt = rqstp->rq_xprt; - trace_svc_xdr_recvfrom(&rqstp->rq_arg); + svc_thread_wake_next(rqstp); + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. When there are no + * idle threads, we reduce the wait time. + */ + if (pool->sp_idle_threads.first) + rqstp->rq_chandle.thread_wait = 5 * HZ; + else + rqstp->rq_chandle.thread_wait = 1 * HZ; - clear_bit(XPT_OLD, &xprt->xpt_flags); + trace_svc_xprt_dequeue(rqstp); + svc_handle_xprt(rqstp, xprt); + } - rqstp->rq_chandle.defer = svc_defer; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + if (svc_is_backchannel(rqstp)) { + struct svc_serv *serv = rqstp->rq_server; + struct rpc_rqst *req; - if (serv->sv_stats) - serv->sv_stats->netcnt++; - percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived); - rqstp->rq_stime = ktime_get(); - svc_process(rqstp); -out: - return; -out_release: - rqstp->rq_res.len = 0; - svc_xprt_release(rqstp); + req = lwq_dequeue(&serv->sv_cb_list, + struct rpc_rqst, rq_bc_list); + if (req) { + svc_thread_wake_next(rqstp); + svc_process_bc(req, rqstp); + } + } +#endif } EXPORT_SYMBOL_GPL(svc_recv); @@ -889,7 +894,6 @@ EXPORT_SYMBOL_GPL(svc_recv); void svc_drop(struct svc_rqst *rqstp) { trace_svc_drop(rqstp); - svc_xprt_release(rqstp); } EXPORT_SYMBOL_GPL(svc_drop); @@ -905,8 +909,6 @@ void svc_send(struct svc_rqst *rqstp) int status; xprt = rqstp->rq_xprt; - if (!xprt) - return; /* calculate over-all length */ xb = &rqstp->rq_res; @@ -919,7 +921,6 @@ void svc_send(struct svc_rqst *rqstp) status = xprt->xpt_ops->xpo_sendto(rqstp); trace_svc_send(rqstp, status); - svc_xprt_release(rqstp); } /* @@ -1030,7 +1031,6 @@ static void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); list_del_init(&xprt->xpt_list); - WARN_ON_ONCE(!list_empty(&xprt->xpt_ready)); if (test_bit(XPT_TEMP, &xprt->xpt_flags)) serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); @@ -1081,36 +1081,26 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st return ret; } -static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) +static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) { - struct svc_pool *pool; struct svc_xprt *xprt; - struct svc_xprt *tmp; int i; for (i = 0; i < serv->sv_nrpools; i++) { - pool = &serv->sv_pools[i]; - - spin_lock_bh(&pool->sp_lock); - list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { - if (xprt->xpt_net != net) - continue; - list_del_init(&xprt->xpt_ready); - spin_unlock_bh(&pool->sp_lock); - return xprt; + struct svc_pool *pool = &serv->sv_pools[i]; + struct llist_node *q, **t1, *t2; + + q = lwq_dequeue_all(&pool->sp_xprts); + lwq_for_each_safe(xprt, t1, t2, &q, xpt_ready) { + if (xprt->xpt_net == net) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_delete_xprt(xprt); + xprt = NULL; + } } - spin_unlock_bh(&pool->sp_lock); - } - return NULL; -} - -static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) -{ - struct svc_xprt *xprt; - while ((xprt = svc_dequeue_net(serv, net))) { - set_bit(XPT_CLOSE, &xprt->xpt_flags); - svc_delete_xprt(xprt); + if (q) + lwq_enqueue_batch(q, &pool->sp_xprts); } } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ab453ede54..6cc9ffac96 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -283,7 +283,7 @@ out_unlock: xprt_clear_locked(xprt); out_sleep: task->tk_status = -EAGAIN; - if (RPC_IS_SOFT(task)) + if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task)) rpc_sleep_on_timeout(&xprt->sending, task, NULL, xprt_request_timeout(req)); else @@ -349,7 +349,7 @@ out_unlock: xprt_clear_locked(xprt); out_sleep: task->tk_status = -EAGAIN; - if (RPC_IS_SOFT(task)) + if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task)) rpc_sleep_on_timeout(&xprt->sending, task, NULL, xprt_request_timeout(req)); else @@ -651,9 +651,9 @@ static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime) jiffies + nsecs_to_jiffies(-delta); } -static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) +static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req, + const struct rpc_timeout *to) { - const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; unsigned long majortimeo = req->rq_timeout; if (to->to_exponential) @@ -665,9 +665,10 @@ static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req) return majortimeo; } -static void xprt_reset_majortimeo(struct rpc_rqst *req) +static void xprt_reset_majortimeo(struct rpc_rqst *req, + const struct rpc_timeout *to) { - req->rq_majortimeo += xprt_calc_majortimeo(req); + req->rq_majortimeo += xprt_calc_majortimeo(req, to); } static void xprt_reset_minortimeo(struct rpc_rqst *req) @@ -675,7 +676,8 @@ static void xprt_reset_minortimeo(struct rpc_rqst *req) req->rq_minortimeo += req->rq_timeout; } -static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) +static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req, + const struct rpc_timeout *to) { unsigned long time_init; struct rpc_xprt *xprt = req->rq_xprt; @@ -684,8 +686,9 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req) time_init = jiffies; else time_init = xprt_abs_ktime_to_jiffies(task->tk_start); - req->rq_timeout = task->tk_client->cl_timeout->to_initval; - req->rq_majortimeo = time_init + xprt_calc_majortimeo(req); + + req->rq_timeout = to->to_initval; + req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, to); req->rq_minortimeo = time_init + req->rq_timeout; } @@ -713,7 +716,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req) } else { req->rq_timeout = to->to_initval; req->rq_retries = 0; - xprt_reset_majortimeo(req); + xprt_reset_majortimeo(req, to); /* Reset the RTT counters == "slow start" */ spin_lock(&xprt->transport_lock); rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); @@ -1886,7 +1889,7 @@ xprt_request_init(struct rpc_task *task) req->rq_snd_buf.bvec = NULL; req->rq_rcv_buf.bvec = NULL; req->rq_release_snd_buf = NULL; - xprt_init_majortimeo(task, req); + xprt_init_majortimeo(task, req, task->tk_client->cl_timeout); trace_xprt_reserve(req); } @@ -1996,6 +1999,8 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task) */ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + xbufp->tail[0].iov_len; + + xprt_init_majortimeo(task, req, req->rq_xprt->timeout); } #endif diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index 74ee227125..720d3ba742 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -336,8 +336,9 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi) xprt_switch_find_current_entry_offline); } -bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, - const struct sockaddr *sap) +static +bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) { struct list_head *head; struct rpc_xprt *pos; @@ -356,6 +357,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, return false; } +bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap) +{ + bool res; + + rcu_read_lock(); + res = __rpc_xprt_switch_has_addr(xps, sap); + rcu_read_unlock(); + + return res; +} + static struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head, const struct rpc_xprt *cur, bool check_active) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index e4d84a13c5..8c817e7552 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -263,11 +263,9 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, /* Queue rqst for ULP's callback service */ bc_serv = xprt->bc_serv; xprt_get(xprt); - spin_lock(&bc_serv->sv_cb_lock); - list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list); - spin_unlock(&bc_serv->sv_cb_lock); + lwq_enqueue(&rqst->rq_bc_list, &bc_serv->sv_cb_list); - wake_up(&bc_serv->sv_cb_waitq); + svc_pool_wake_idle_thread(&bc_serv->sv_pools[0]); r_xprt->rx_stats.bcall_count++; return; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a15bf2ede8..58f3dc8d0d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1181,6 +1181,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + transport->xprt_err = 0; clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); @@ -2772,18 +2773,13 @@ static void xs_wake_error(struct sock_xprt *transport) { int sockerr; - if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) - return; - mutex_lock(&transport->recv_mutex); - if (transport->sock == NULL) - goto out; if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state)) - goto out; + return; sockerr = xchg(&transport->xprt_err, 0); - if (sockerr < 0) + if (sockerr < 0) { xprt_wake_pending_tasks(&transport->xprt, sockerr); -out: - mutex_unlock(&transport->recv_mutex); + xs_tcp_force_close(&transport->xprt); + } } static void xs_wake_pending(struct sock_xprt *transport) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 5b04528484..c9189a970e 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -19,6 +19,35 @@ #include <linux/rtnetlink.h> #include <net/switchdev.h> +static bool switchdev_obj_eq(const struct switchdev_obj *a, + const struct switchdev_obj *b) +{ + const struct switchdev_obj_port_vlan *va, *vb; + const struct switchdev_obj_port_mdb *ma, *mb; + + if (a->id != b->id || a->orig_dev != b->orig_dev) + return false; + + switch (a->id) { + case SWITCHDEV_OBJ_ID_PORT_VLAN: + va = SWITCHDEV_OBJ_PORT_VLAN(a); + vb = SWITCHDEV_OBJ_PORT_VLAN(b); + return va->flags == vb->flags && + va->vid == vb->vid && + va->changed == vb->changed; + case SWITCHDEV_OBJ_ID_PORT_MDB: + case SWITCHDEV_OBJ_ID_HOST_MDB: + ma = SWITCHDEV_OBJ_PORT_MDB(a); + mb = SWITCHDEV_OBJ_PORT_MDB(b); + return ma->vid == mb->vid && + ether_addr_equal(ma->addr, mb->addr); + default: + break; + } + + BUG(); +} + static LIST_HEAD(deferred); static DEFINE_SPINLOCK(deferred_lock); @@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); +/** + * switchdev_port_obj_act_is_deferred - Is object action pending? + * + * @dev: port device + * @nt: type of action; add or delete + * @obj: object to test + * + * Returns true if a deferred item is pending, which is + * equivalent to the action @nt on an object @obj. + * + * rtnl_lock must be held. + */ +bool switchdev_port_obj_act_is_deferred(struct net_device *dev, + enum switchdev_notifier_type nt, + const struct switchdev_obj *obj) +{ + struct switchdev_deferred_item *dfitem; + bool found = false; + + ASSERT_RTNL(); + + spin_lock_bh(&deferred_lock); + + list_for_each_entry(dfitem, &deferred, list) { + if (dfitem->dev != dev) + continue; + + if ((dfitem->func == switchdev_port_obj_add_deferred && + nt == SWITCHDEV_PORT_OBJ_ADD) || + (dfitem->func == switchdev_port_obj_del_deferred && + nt == SWITCHDEV_PORT_OBJ_DEL)) { + if (switchdev_obj_eq((const void *)dfitem->data, obj)) { + found = true; + break; + } + } + } + + spin_unlock_bh(&deferred_lock); + + return found; +} +EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred); + static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 2cde375477..878415c435 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1086,6 +1086,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_TIPC_MEDIA_UDP if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { + rtnl_unlock(); + NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); + return -EINVAL; + } + err = tipc_udp_nl_bearer_add(b, attrs[TIPC_NLA_BEARER_UDP_OPTS]); if (err) { diff --git a/net/tipc/diag.c b/net/tipc/diag.c index 73137f4aeb..18733451c9 100644 --- a/net/tipc/diag.c +++ b/net/tipc/diag.c @@ -113,4 +113,5 @@ module_init(tipc_diag_init); module_exit(tipc_diag_exit); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TIPC socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_TIPC); diff --git a/net/tls/tls.h b/net/tls/tls.h index 28a8c0e80e..762f424ff2 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -127,7 +127,7 @@ struct tls_rec { struct sock *sk; char aad_space[TLS_AAD_SPACE_SIZE]; - u8 iv_data[MAX_IV_SIZE]; + u8 iv_data[TLS_MAX_IV_SIZE]; struct aead_request aead_req; u8 aead_req_ctx[]; }; @@ -142,7 +142,10 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx); int wait_on_pending_writer(struct sock *sk, long *timeo); void tls_err_abort(struct sock *sk, int err); -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); +int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc); +int tls_set_sw_offload(struct sock *sk, int tx); void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); @@ -223,7 +226,7 @@ static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx) #ifdef CONFIG_TLS_DEVICE int tls_device_init(void); void tls_device_cleanup(void); -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +int tls_set_device_offload(struct sock *sk); void tls_device_free_resources_tx(struct sock *sk); int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx); void tls_device_offload_cleanup_rx(struct sock *sk); @@ -234,7 +237,7 @@ static inline int tls_device_init(void) { return 0; } static inline void tls_device_cleanup(void) {} static inline int -tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +tls_set_device_offload(struct sock *sk) { return -EOPNOTSUPP; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 8c94c92660..bf8ed36b1a 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -56,11 +56,8 @@ static struct page *dummy_page; static void tls_device_free_ctx(struct tls_context *ctx) { - if (ctx->tx_conf == TLS_HW) { + if (ctx->tx_conf == TLS_HW) kfree(tls_offload_ctx_tx(ctx)); - kfree(ctx->tx.rec_seq); - kfree(ctx->tx.iv); - } if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); @@ -891,14 +888,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx) struct strp_msg *rxm; char *orig_buf, *buf; - switch (tls_ctx->crypto_recv.info.cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - return -EINVAL; - } cipher_desc = get_cipher_desc(tls_ctx->crypto_recv.info.cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); rxm = strp_msg(tls_strp_msg(sw_ctx)); orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_desc->iv, @@ -1042,22 +1033,45 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, } } -int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +static struct tls_offload_context_tx *alloc_offload_ctx_tx(struct tls_context *ctx) +{ + struct tls_offload_context_tx *offload_ctx; + __be64 rcd_sn; + + offload_ctx = kzalloc(sizeof(*offload_ctx), GFP_KERNEL); + if (!offload_ctx) + return NULL; + + INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); + INIT_LIST_HEAD(&offload_ctx->records_list); + spin_lock_init(&offload_ctx->lock); + sg_init_table(offload_ctx->sg_tx_data, + ARRAY_SIZE(offload_ctx->sg_tx_data)); + + /* start at rec_seq - 1 to account for the start marker record */ + memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); + offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; + + offload_ctx->ctx = ctx; + + return offload_ctx; +} + +int tls_set_device_offload(struct sock *sk) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - const struct tls_cipher_desc *cipher_desc; struct tls_record_info *start_marker_record; struct tls_offload_context_tx *offload_ctx; + const struct tls_cipher_desc *cipher_desc; struct tls_crypto_info *crypto_info; + struct tls_prot_info *prot; struct net_device *netdev; - char *iv, *rec_seq; + struct tls_context *ctx; struct sk_buff *skb; - __be64 rcd_sn; + char *iv, *rec_seq; int rc; - if (!ctx) - return -EINVAL; + ctx = tls_get_ctx(sk); + prot = &ctx->prot_info; if (ctx->priv_ctx_tx) return -EEXIST; @@ -1085,38 +1099,23 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) goto release_netdev; } + rc = init_prot_info(prot, crypto_info, cipher_desc); + if (rc) + goto release_netdev; + iv = crypto_info_iv(crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - prot->version = crypto_info->version; - prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + cipher_desc->iv; - prot->tag_size = cipher_desc->tag; - prot->overhead_size = prot->prepend_size + prot->tag_size; - prot->iv_size = cipher_desc->iv; - prot->salt_size = cipher_desc->salt; - ctx->tx.iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL); - if (!ctx->tx.iv) { - rc = -ENOMEM; - goto release_netdev; - } - memcpy(ctx->tx.iv + cipher_desc->salt, iv, cipher_desc->iv); - - prot->rec_seq_size = cipher_desc->rec_seq; - ctx->tx.rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL); - if (!ctx->tx.rec_seq) { - rc = -ENOMEM; - goto free_iv; - } + memcpy(ctx->tx.rec_seq, rec_seq, cipher_desc->rec_seq); start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); if (!start_marker_record) { rc = -ENOMEM; - goto free_rec_seq; + goto release_netdev; } - offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); + offload_ctx = alloc_offload_ctx_tx(ctx); if (!offload_ctx) { rc = -ENOMEM; goto free_marker_record; @@ -1126,22 +1125,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) if (rc) goto free_offload_ctx; - /* start at rec_seq - 1 to account for the start marker record */ - memcpy(&rcd_sn, ctx->tx.rec_seq, sizeof(rcd_sn)); - offload_ctx->unacked_record_sn = be64_to_cpu(rcd_sn) - 1; - start_marker_record->end_seq = tcp_sk(sk)->write_seq; start_marker_record->len = 0; start_marker_record->num_frags = 0; - - INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); - offload_ctx->ctx = ctx; - - INIT_LIST_HEAD(&offload_ctx->records_list); list_add_tail(&start_marker_record->list, &offload_ctx->records_list); - spin_lock_init(&offload_ctx->lock); - sg_init_table(offload_ctx->sg_tx_data, - ARRAY_SIZE(offload_ctx->sg_tx_data)); clean_acked_data_enable(inet_csk(sk), &tls_icsk_clean_acked); ctx->push_pending_record = tls_device_push_pending_record; @@ -1198,10 +1185,6 @@ free_offload_ctx: ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); -free_rec_seq: - kfree(ctx->tx.rec_seq); -free_iv: - kfree(ctx->tx.iv); release_netdev: dev_put(netdev); return rc; @@ -1242,7 +1225,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto release_lock; } - context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL); + context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) { rc = -ENOMEM; goto release_lock; @@ -1250,7 +1233,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) context->resync_nh_reset = 1; ctx->priv_ctx_rx = context; - rc = tls_set_sw_offload(sk, ctx, 0); + rc = tls_set_sw_offload(sk, 0); if (rc) goto release_ctx; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 1d743f310f..4e7228f275 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -54,7 +54,7 @@ static int tls_enc_record(struct aead_request *aead_req, struct scatter_walk *out, int *in_len, struct tls_prot_info *prot) { - unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE]; + unsigned char buf[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; const struct tls_cipher_desc *cipher_desc; struct scatterlist sg_in[3]; struct scatterlist sg_out[3]; @@ -62,14 +62,8 @@ static int tls_enc_record(struct aead_request *aead_req, u16 len; int rc; - switch (prot->cipher_type) { - case TLS_CIPHER_AES_GCM_128: - case TLS_CIPHER_AES_GCM_256: - break; - default: - return -EINVAL; - } cipher_desc = get_cipher_desc(prot->cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); buf_size = TLS_HEADER_SIZE + cipher_desc->iv; len = min_t(int, *in_len, buf_size); @@ -338,17 +332,9 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, if (!aead_req) return NULL; - switch (tls_ctx->crypto_send.info.cipher_type) { - case TLS_CIPHER_AES_GCM_128: - salt = tls_ctx->crypto_send.aes_gcm_128.salt; - break; - case TLS_CIPHER_AES_GCM_256: - salt = tls_ctx->crypto_send.aes_gcm_256.salt; - break; - default: - goto free_req; - } cipher_desc = get_cipher_desc(tls_ctx->crypto_send.info.cipher_type); + DEBUG_NET_WARN_ON_ONCE(!cipher_desc || !cipher_desc->offloadable); + buf_len = cipher_desc->salt + cipher_desc->iv + TLS_AAD_SPACE_SIZE + sync_size + cipher_desc->tag; buf = kmalloc(buf_len, GFP_ATOMIC); @@ -356,6 +342,7 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, goto free_req; iv = buf; + salt = crypto_info_salt(&tls_ctx->crypto_send.info, cipher_desc); memcpy(iv, salt, cipher_desc->salt); aad = buf + cipher_desc->salt + cipher_desc->iv; dummy_buf = aad + TLS_AAD_SPACE_SIZE; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 002483e60c..b4674f03d7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -59,7 +59,8 @@ enum { }; #define CHECK_CIPHER_DESC(cipher,ci) \ - static_assert(cipher ## _IV_SIZE <= MAX_IV_SIZE); \ + static_assert(cipher ## _IV_SIZE <= TLS_MAX_IV_SIZE); \ + static_assert(cipher ## _SALT_SIZE <= TLS_MAX_SALT_SIZE); \ static_assert(cipher ## _REC_SEQ_SIZE <= TLS_MAX_REC_SEQ_SIZE); \ static_assert(cipher ## _TAG_SIZE == TLS_TAG_SIZE); \ static_assert(sizeof_field(struct ci, iv) == cipher ## _IV_SIZE); \ @@ -348,8 +349,6 @@ static void tls_sk_proto_cleanup(struct sock *sk, /* We need these for tls_sw_fallback handling of other packets */ if (ctx->tx_conf == TLS_SW) { - kfree(ctx->tx.rec_seq); - kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); TLS_DEC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); } else if (ctx->tx_conf == TLS_HW) { @@ -585,6 +584,31 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, return do_tls_getsockopt(sk, optname, optval, optlen); } +static int validate_crypto_info(const struct tls_crypto_info *crypto_info, + const struct tls_crypto_info *alt_crypto_info) +{ + if (crypto_info->version != TLS_1_2_VERSION && + crypto_info->version != TLS_1_3_VERSION) + return -EINVAL; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_ARIA_GCM_128: + case TLS_CIPHER_ARIA_GCM_256: + if (crypto_info->version != TLS_1_2_VERSION) + return -EINVAL; + break; + } + + /* Ensure that TLS version and ciphers are same in both directions */ + if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { + if (alt_crypto_info->version != crypto_info->version || + alt_crypto_info->cipher_type != crypto_info->cipher_type) + return -EINVAL; + } + + return 0; +} + static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, unsigned int optlen, int tx) { @@ -616,21 +640,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - /* check version */ - if (crypto_info->version != TLS_1_2_VERSION && - crypto_info->version != TLS_1_3_VERSION) { - rc = -EINVAL; + rc = validate_crypto_info(crypto_info, alt_crypto_info); + if (rc) goto err_crypto_info; - } - - /* Ensure that TLS version and ciphers are same in both directions */ - if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) { - if (alt_crypto_info->version != crypto_info->version || - alt_crypto_info->cipher_type != crypto_info->cipher_type) { - rc = -EINVAL; - goto err_crypto_info; - } - } cipher_desc = get_cipher_desc(crypto_info->cipher_type); if (!cipher_desc) { @@ -638,16 +650,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - switch (crypto_info->cipher_type) { - case TLS_CIPHER_ARIA_GCM_128: - case TLS_CIPHER_ARIA_GCM_256: - if (crypto_info->version != TLS_1_2_VERSION) { - rc = -EINVAL; - goto err_crypto_info; - } - break; - } - if (optlen != cipher_desc->crypto_info) { rc = -EINVAL; goto err_crypto_info; @@ -662,13 +664,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, } if (tx) { - rc = tls_set_device_offload(sk, ctx); + rc = tls_set_device_offload(sk); conf = TLS_HW; if (!rc) { TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); } else { - rc = tls_set_sw_offload(sk, ctx, 1); + rc = tls_set_sw_offload(sk, 1); if (rc) goto err_crypto_info; TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); @@ -682,7 +684,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); } else { - rc = tls_set_sw_offload(sk, ctx, 0); + rc = tls_set_sw_offload(sk, 0); if (rc) goto err_crypto_info; TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); @@ -1001,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index dba523cdc7..de96959336 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -60,9 +60,10 @@ struct tls_decrypt_arg { struct tls_decrypt_ctx { struct sock *sk; - u8 iv[MAX_IV_SIZE]; + u8 iv[TLS_MAX_IV_SIZE]; u8 aad[TLS_MAX_AAD_SIZE]; u8 tail; + bool free_sgout; struct scatterlist sg[]; }; @@ -187,7 +188,6 @@ static void tls_decrypt_done(void *data, int err) struct aead_request *aead_req = data; struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); struct scatterlist *sgout = aead_req->dst; - struct scatterlist *sgin = aead_req->src; struct tls_sw_context_rx *ctx; struct tls_decrypt_ctx *dctx; struct tls_context *tls_ctx; @@ -196,6 +196,17 @@ static void tls_decrypt_done(void *data, int err) struct sock *sk; int aead_size; + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; + aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead); aead_size = ALIGN(aead_size, __alignof__(*dctx)); dctx = (void *)((u8 *)aead_req + aead_size); @@ -213,7 +224,7 @@ static void tls_decrypt_done(void *data, int err) } /* Free the destination pages if skb was not decrypted inplace */ - if (sgout != sgin) { + if (dctx->free_sgout) { /* Skip the first S/G entry as it points to AAD */ for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) { if (!sg) @@ -224,10 +235,17 @@ static void tls_decrypt_done(void *data, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - if (!atomic_dec_return(&ctx->decrypt_pending)) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); +} + +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + if (!atomic_dec_and_test(&ctx->decrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); + + return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, @@ -253,6 +271,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, aead_req); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -261,6 +280,10 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + ret = ret ?: -EINPROGRESS; + } if (ret == -EINPROGRESS) { if (darg->async) return 0; @@ -439,9 +462,10 @@ static void tls_encrypt_done(void *data, int err) struct tls_rec *rec = data; struct scatterlist *sge; struct sk_msg *msg_en; - bool ready = false; struct sock *sk; - int pending; + + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; msg_en = &rec->msg_encrypted; @@ -476,23 +500,25 @@ static void tls_encrypt_done(void *data, int err) /* If received record is at head of tx_list, schedule tx */ first_rec = list_first_entry(&ctx->tx_list, struct tls_rec, list); - if (rec == first_rec) - ready = true; + if (rec == first_rec) { + /* Schedule the transmission */ + if (!test_and_set_bit(BIT_TX_SCHEDULED, + &ctx->tx_bitmask)) + schedule_delayed_work(&ctx->tx_work.work, 1); + } } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); +} - if (!ready) - return; +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + if (!atomic_dec_and_test(&ctx->encrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->encrypt_pending); - /* Schedule the transmission */ - if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) - schedule_delayed_work(&ctx->tx_work.work, 1); + return ctx->async_wait.err; } static int tls_do_encryption(struct sock *sk, @@ -541,9 +567,14 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; @@ -984,7 +1015,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg, int num_zc = 0; int orig_size; int ret = 0; - int pending; if (!eor && (msg->msg_flags & MSG_EOR)) return -EINVAL; @@ -1163,24 +1193,12 @@ trim_sgl: if (!num_async) { goto send_end; } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1229,7 +1247,6 @@ void tls_sw_splice_eof(struct socket *sock) ssize_t copied = 0; bool retrying = false; int ret = 0; - int pending; if (!ctx->open_rec) return; @@ -1264,22 +1281,7 @@ retry: } /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no pending - * encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - - if (ctx->async_wait.err) + if (tls_encrypt_async_wait(ctx)) goto unlock; /* Transmit if any encryptions have completed */ @@ -1581,6 +1583,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov, } else if (out_sg) { memcpy(sgout, out_sg, n_sgout * sizeof(*sgout)); } + dctx->free_sgout = !!pages; /* Prepare and submit AEAD request */ err = tls_do_decryption(sk, sgin, sgout, dctx->iv, @@ -1769,7 +1772,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, u8 *control, size_t skip, size_t len, - bool is_peek) + bool is_peek, + bool *more) { struct sk_buff *skb = skb_peek(&ctx->rx_list); struct tls_msg *tlm; @@ -1782,7 +1786,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; if (skip < rxm->full_len) break; @@ -1800,12 +1804,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, err = tls_record_content_type(msg, tlm, control); if (err <= 0) - goto out; + goto more; err = skb_copy_datagram_msg(skb, rxm->offset + skip, msg, chunk); if (err < 0) - goto out; + goto more; len = len - chunk; copied = copied + chunk; @@ -1841,6 +1845,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx, out: return copied ? : err; +more: + if (more) + *more = true; + goto out; } static bool @@ -1944,6 +1952,7 @@ int tls_sw_recvmsg(struct sock *sk, int target, err; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool rx_more = false; bool released = true; bool bpf_strp_enabled; bool zc_capable; @@ -1963,12 +1972,12 @@ int tls_sw_recvmsg(struct sock *sk, goto end; /* Process pending decrypted records. It must be non-zero-copy */ - err = process_rx_list(ctx, msg, &control, 0, len, is_peek); + err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more); if (err < 0) goto end; copied = err; - if (len <= copied) + if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more) goto end; target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); @@ -2061,6 +2070,8 @@ put_on_rx_list: decrypted += chunk; len -= chunk; __skb_queue_tail(&ctx->rx_list, skb); + if (unlikely(control != TLS_RECORD_TYPE_DATA)) + break; continue; } @@ -2109,16 +2120,10 @@ put_on_rx_list: recv_end: if (async) { - int ret, pending; + int ret; /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - reinit_completion(&ctx->async_wait.completion); - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - ret = 0; - if (pending) - ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + ret = tls_decrypt_async_wait(ctx); __skb_queue_purge(&ctx->async_hold); if (ret) { @@ -2131,11 +2136,10 @@ recv_end: /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, - decrypted, is_peek); + decrypted, is_peek, NULL); else err = process_rx_list(ctx, msg, &control, 0, - async_copy_bytes, is_peek); - decrypted += max(err, 0); + async_copy_bytes, is_peek, NULL); } copied += decrypted; @@ -2335,7 +2339,7 @@ int tls_rx_msg_size(struct tls_strparser *strp, struct sk_buff *skb) { struct tls_context *tls_ctx = tls_get_ctx(strp->sk); struct tls_prot_info *prot = &tls_ctx->prot_info; - char header[TLS_HEADER_SIZE + MAX_IV_SIZE]; + char header[TLS_HEADER_SIZE + TLS_MAX_IV_SIZE]; size_t cipher_overhead; size_t data_len = 0; int ret; @@ -2435,16 +2439,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); @@ -2483,9 +2480,6 @@ void tls_sw_release_resources_rx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - kfree(tls_ctx->rx.rec_seq); - kfree(tls_ctx->rx.iv); - if (ctx->aead_recv) { __skb_queue_purge(&ctx->rx_list); crypto_free_aead(ctx->aead_recv); @@ -2597,69 +2591,113 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx) tls_ctx->prot_info.version != TLS_1_3_VERSION; } -int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); + init_waitqueue_head(&sw_ctx_rx->wq); + skb_queue_head_init(&sw_ctx_rx->rx_list); + skb_queue_head_init(&sw_ctx_rx->async_hold); + + return sw_ctx_rx; +} + +int init_prot_info(struct tls_prot_info *prot, + const struct tls_crypto_info *crypto_info, + const struct tls_cipher_desc *cipher_desc) +{ + u16 nonce_size = cipher_desc->nonce; + + if (crypto_info->version == TLS_1_3_VERSION) { + nonce_size = 0; + prot->aad_size = TLS_HEADER_SIZE; + prot->tail_size = 1; + } else { + prot->aad_size = TLS_AAD_SPACE_SIZE; + prot->tail_size = 0; + } + + /* Sanity-check the sizes for stack allocations. */ + if (nonce_size > TLS_MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) + return -EINVAL; + + prot->version = crypto_info->version; + prot->cipher_type = crypto_info->cipher_type; + prot->prepend_size = TLS_HEADER_SIZE + nonce_size; + prot->tag_size = cipher_desc->tag; + prot->overhead_size = prot->prepend_size + prot->tag_size + prot->tail_size; + prot->iv_size = cipher_desc->iv; + prot->salt_size = cipher_desc->salt; + prot->rec_seq_size = cipher_desc->rec_seq; + + return 0; +} + +int tls_set_sw_offload(struct sock *sk, int tx) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_prot_info *prot = &tls_ctx->prot_info; - struct tls_crypto_info *crypto_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; + const struct tls_cipher_desc *cipher_desc; + struct tls_crypto_info *crypto_info; + char *iv, *rec_seq, *key, *salt; struct cipher_context *cctx; + struct tls_prot_info *prot; struct crypto_aead **aead; + struct tls_context *ctx; struct crypto_tfm *tfm; - char *iv, *rec_seq, *key, *salt; - const struct tls_cipher_desc *cipher_desc; - u16 nonce_size; int rc = 0; - if (!ctx) { - rc = -EINVAL; - goto out; - } + ctx = tls_get_ctx(sk); + prot = &ctx->prot_info; if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); - init_waitqueue_head(&sw_ctx_rx->wq); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); - skb_queue_head_init(&sw_ctx_rx->async_hold); aead = &sw_ctx_rx->aead_recv; } @@ -2669,58 +2707,25 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) goto free_priv; } - nonce_size = cipher_desc->nonce; + rc = init_prot_info(prot, crypto_info, cipher_desc); + if (rc) + goto free_priv; iv = crypto_info_iv(crypto_info, cipher_desc); key = crypto_info_key(crypto_info, cipher_desc); salt = crypto_info_salt(crypto_info, cipher_desc); rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - if (crypto_info->version == TLS_1_3_VERSION) { - nonce_size = 0; - prot->aad_size = TLS_HEADER_SIZE; - prot->tail_size = 1; - } else { - prot->aad_size = TLS_AAD_SPACE_SIZE; - prot->tail_size = 0; - } - - /* Sanity-check the sizes for stack allocations. */ - if (nonce_size > MAX_IV_SIZE || prot->aad_size > TLS_MAX_AAD_SIZE) { - rc = -EINVAL; - goto free_priv; - } - - prot->version = crypto_info->version; - prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + nonce_size; - prot->tag_size = cipher_desc->tag; - prot->overhead_size = prot->prepend_size + - prot->tag_size + prot->tail_size; - prot->iv_size = cipher_desc->iv; - prot->salt_size = cipher_desc->salt; - cctx->iv = kmalloc(cipher_desc->iv + cipher_desc->salt, GFP_KERNEL); - if (!cctx->iv) { - rc = -ENOMEM; - goto free_priv; - } - /* Note: 128 & 256 bit salt are the same size */ - prot->rec_seq_size = cipher_desc->rec_seq; memcpy(cctx->iv, salt, cipher_desc->salt); memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); - - cctx->rec_seq = kmemdup(rec_seq, cipher_desc->rec_seq, GFP_KERNEL); - if (!cctx->rec_seq) { - rc = -ENOMEM; - goto free_iv; - } + memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); if (!*aead) { *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0); if (IS_ERR(*aead)) { rc = PTR_ERR(*aead); *aead = NULL; - goto free_rec_seq; + goto free_priv; } } @@ -2752,12 +2757,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) free_aead: crypto_free_aead(*aead); *aead = NULL; -free_rec_seq: - kfree(cctx->rec_seq); - cctx->rec_seq = NULL; -free_iv: - kfree(cctx->iv); - cctx->iv = NULL; free_priv: if (tx) { kfree(ctx->priv_ctx_tx); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1e1a88bd4e..0748e7ea52 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -116,6 +116,7 @@ #include <linux/freezer.h> #include <linux/file.h> #include <linux/btf_ids.h> +#include <linux/bpf-cgroup.h> #include "scm.h" @@ -781,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t, int); -static int unix_set_peek_off(struct sock *sk, int val) -{ - struct unix_sock *u = unix_sk(sk); - - if (mutex_lock_interruptible(&u->iolock)) - return -EINTR; - - WRITE_ONCE(sk->sk_peek_off, val); - mutex_unlock(&u->iolock); - - return 0; -} - #ifdef CONFIG_PROC_FS static int unix_count_nr_fds(struct sock *sk) { @@ -861,7 +849,7 @@ static const struct proto_ops unix_stream_ops = { .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, .splice_read = unix_stream_splice_read, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -885,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = { .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -908,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = { .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, - .set_peek_off = unix_set_peek_off, + .set_peek_off = sk_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -1343,13 +1331,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock_nested(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock_nested(sk1); - } + if (sk1 > sk2) + swap(sk1, sk2); + + unix_state_lock(sk1); + unix_state_lock_nested(sk2, U_LOCK_SECOND); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1379,6 +1365,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, addr, &alen); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !unix_sk(sk)->addr) { @@ -1488,6 +1478,10 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (err) goto out; + err = BPF_CGROUP_RUN_PROG_UNIX_CONNECT_LOCK(sk, uaddr, &addr_len); + if (err) + goto out; + if ((test_bit(SOCK_PASSCRED, &sock->flags) || test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) { err = unix_autobind(sk); @@ -1582,7 +1576,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk); + unix_state_lock_nested(sk, U_LOCK_SECOND); if (sk->sk_state != st) { unix_state_unlock(sk); @@ -1768,6 +1762,13 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer) } else { err = addr->len; memcpy(sunaddr, addr->name, addr->len); + + if (peer) + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETPEERNAME); + else + BPF_CGROUP_RUN_SA_PROG(sk, uaddr, &err, + CGROUP_UNIX_GETSOCKNAME); } sock_put(sk); out: @@ -1920,6 +1921,13 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_validate_addr(sunaddr, msg->msg_namelen); if (err) goto out; + + err = BPF_CGROUP_RUN_PROG_UNIX_SENDMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen, + NULL); + if (err) + goto out; } else { sunaddr = NULL; err = -ENOTCONN; @@ -2388,9 +2396,14 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - if (msg->msg_name) + if (msg->msg_name) { unix_copy_addr(msg, skb->sk); + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + msg->msg_name, + &msg->msg_namelen); + } + if (size > skb->len - skip) size = skb->len - skip; else if (size < skb->len - skip) @@ -2743,6 +2756,11 @@ unlock: DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, state->msg->msg_name); unix_copy_addr(state->msg, skb->sk); + + BPF_CGROUP_RUN_PROG_UNIX_RECVMSG_LOCK(sk, + state->msg->msg_name, + &state->msg->msg_namelen); + sunaddr = NULL; } @@ -3310,7 +3328,7 @@ static const struct seq_operations unix_seq_ops = { .show = unix_seq_show, }; -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) +#ifdef CONFIG_BPF_SYSCALL struct bpf_unix_iter_state { struct seq_net_private p; unsigned int cur_sk; @@ -3572,7 +3590,7 @@ static struct pernet_operations unix_net_ops = { .exit = unix_net_exit, }; -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, struct unix_sock *unix_sk, uid_t uid) @@ -3672,7 +3690,7 @@ static int __init af_unix_init(void) register_pernet_subsys(&unix_net_ops); unix_bpf_build_proto(); -#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) bpf_iter_register(); #endif @@ -3680,20 +3698,5 @@ out: return rc; } -static void __exit af_unix_exit(void) -{ - sock_unregister(PF_UNIX); - proto_unregister(&unix_dgram_proto); - proto_unregister(&unix_stream_proto); - unregister_pernet_subsys(&unix_net_ops); -} - -/* Earlier than device_initcall() so that other drivers invoking - request_module() don't end up in a loop when modprobe tries - to use a UNIX socket. But later than subsys_initcall() because - we depend on stuff initialised there */ +/* Later than subsys_initcall() because we depend on stuff initialised there */ fs_initcall(af_unix_init); -module_exit(af_unix_exit); - -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_UNIX); diff --git a/net/unix/diag.c b/net/unix/diag.c index 616b55c5b8..be19827eca 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) * queue lock. With the other's queue locked it's * OK to lock the state. */ - unix_state_lock_nested(req); + unix_state_lock_nested(req, U_LOCK_DIAG); peer = unix_sk(req)->peer; buf[i++] = (peer ? sock_i_ino(peer) : 0); unix_state_unlock(req); @@ -339,4 +339,5 @@ static void __exit unix_diag_exit(void) module_init(unix_diag_init); module_exit(unix_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("UNIX socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2405f0f9af..8f63f0b4bf 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -314,6 +314,17 @@ void unix_gc(void) /* Here we are. Hitlist is filled. Die. */ __skb_queue_purge(&hitlist); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + list_for_each_entry_safe(u, next, &gc_candidates, link) { + struct sk_buff *skb = u->oob_skb; + + if (skb) { + u->oob_skb = NULL; + kfree_skb(skb); + } + } +#endif + spin_lock(&unix_gc_lock); /* There could be io_uring registered files, just push them back to diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 4afb6a541c..54ba7316f8 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1032,7 +1032,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock, poll_wait(file, sk_sleep(sk), wait); mask = 0; - if (sk->sk_err) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) /* Signify that there has been an error on this socket. */ mask |= EPOLLERR; @@ -1406,6 +1406,17 @@ static int vsock_connect(struct socket *sock, struct sockaddr *addr, goto out; } + if (vsock_msgzerocopy_allow(transport)) { + set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + } else if (sock_flag(sk, SOCK_ZEROCOPY)) { + /* If this option was set before 'connect()', + * when transport was unknown, check that this + * feature is supported here. + */ + err = -EOPNOTSUPP; + goto out; + } + err = vsock_auto_bind(vsk); if (err) goto out; @@ -1560,6 +1571,9 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, } else { newsock->state = SS_CONNECTED; sock_graft(connected, newsock); + if (vsock_msgzerocopy_allow(vconnected->transport)) + set_bit(SOCK_SUPPORT_ZC, + &connected->sk_socket->flags); } release_sock(connected); @@ -1637,7 +1651,7 @@ static int vsock_connectible_setsockopt(struct socket *sock, const struct vsock_transport *transport; u64 val; - if (level != AF_VSOCK) + if (level != AF_VSOCK && level != SOL_SOCKET) return -ENOPROTOOPT; #define COPY_IN(_v) \ @@ -1660,6 +1674,33 @@ static int vsock_connectible_setsockopt(struct socket *sock, transport = vsk->transport; + if (level == SOL_SOCKET) { + int zerocopy; + + if (optname != SO_ZEROCOPY) { + release_sock(sk); + return sock_setsockopt(sock, level, optname, optval, optlen); + } + + /* Use 'int' type here, because variable to + * set this option usually has this type. + */ + COPY_IN(zerocopy); + + if (zerocopy < 0 || zerocopy > 1) { + err = -EINVAL; + goto exit; + } + + if (transport && !vsock_msgzerocopy_allow(transport)) { + err = -EOPNOTSUPP; + goto exit; + } + + sock_valbool_flag(sk, SOCK_ZEROCOPY, zerocopy); + goto exit; + } + switch (optname) { case SO_VM_SOCKETS_BUFFER_SIZE: COPY_IN(val); @@ -1824,6 +1865,12 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } + if (msg->msg_flags & MSG_ZEROCOPY && + !vsock_msgzerocopy_allow(transport)) { + err = -EOPNOTSUPP; + goto out; + } + /* Wait for room in the produce queue to enqueue our user's data. */ timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); @@ -1923,6 +1970,9 @@ out_err: err = total_written; } out: + if (sk->sk_type == SOCK_STREAM) + err = sk_stream_error(sk, msg->msg_flags, err); + release_sock(sk); return err; } @@ -2312,6 +2362,12 @@ static int vsock_create(struct net *net, struct socket *sock, } } + /* SOCK_DGRAM doesn't have 'setsockopt' callback set in its + * proto_ops, so there is no handler for custom logic. + */ + if (sock_type_connectible(sock->type)) + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); + vsock_insert_unbound(vsk); return 0; diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c index a2823b1c5e..2e29994f92 100644 --- a/net/vmw_vsock/diag.c +++ b/net/vmw_vsock/diag.c @@ -174,5 +174,6 @@ static void __exit vsock_diag_exit(void) module_init(vsock_diag_init); module_exit(vsock_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("VMware Virtual Sockets monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 40 /* AF_VSOCK */); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index a64bf601b4..f495b9e518 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -63,6 +63,17 @@ struct virtio_vsock { u32 guest_cid; bool seqpacket_allow; + + /* These fields are used only in tx path in function + * 'virtio_transport_send_pkt_work()', so to save + * stack space in it, place both of them here. Each + * pointer from 'out_sgs' points to the corresponding + * element in 'out_bufs' - this is initialized in + * 'virtio_vsock_probe()'. Both fields are protected + * by 'tx_lock'. +1 is needed for packet header. + */ + struct scatterlist *out_sgs[MAX_SKB_FRAGS + 1]; + struct scatterlist out_bufs[MAX_SKB_FRAGS + 1]; }; static u32 virtio_transport_get_local_cid(void) @@ -100,8 +111,8 @@ virtio_transport_send_pkt_work(struct work_struct *work) vq = vsock->vqs[VSOCK_VQ_TX]; for (;;) { - struct scatterlist hdr, buf, *sgs[2]; int ret, in_sg = 0, out_sg = 0; + struct scatterlist **sgs; struct sk_buff *skb; bool reply; @@ -111,12 +122,43 @@ virtio_transport_send_pkt_work(struct work_struct *work) virtio_transport_deliver_tap_pkt(skb); reply = virtio_vsock_skb_reply(skb); - - sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb))); - sgs[out_sg++] = &hdr; - if (skb->len > 0) { - sg_init_one(&buf, skb->data, skb->len); - sgs[out_sg++] = &buf; + sgs = vsock->out_sgs; + sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb), + sizeof(*virtio_vsock_hdr(skb))); + out_sg++; + + if (!skb_is_nonlinear(skb)) { + if (skb->len > 0) { + sg_init_one(sgs[out_sg], skb->data, skb->len); + out_sg++; + } + } else { + struct skb_shared_info *si; + int i; + + /* If skb is nonlinear, then its buffer must contain + * only header and nothing more. Data is stored in + * the fragged part. + */ + WARN_ON_ONCE(skb_headroom(skb) != sizeof(*virtio_vsock_hdr(skb))); + + si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) { + skb_frag_t *skb_frag = &si->frags[i]; + void *va; + + /* We will use 'page_to_virt()' for the userspace page + * here, because virtio or dma-mapping layers will call + * 'virt_to_phys()' later to fill the buffer descriptor. + * We don't touch memory at "virtual" address of this page. + */ + va = page_to_virt(skb_frag->bv_page); + sg_init_one(sgs[out_sg], + va + skb_frag->bv_offset, + skb_frag->bv_len); + out_sg++; + } } ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL); @@ -413,6 +455,42 @@ static void virtio_vsock_rx_done(struct virtqueue *vq) queue_work(virtio_vsock_workqueue, &vsock->rx_work); } +static bool virtio_transport_can_msgzerocopy(int bufs_num) +{ + struct virtio_vsock *vsock; + bool res = false; + + rcu_read_lock(); + + vsock = rcu_dereference(the_virtio_vsock); + if (vsock) { + struct virtqueue *vq = vsock->vqs[VSOCK_VQ_TX]; + + /* Check that tx queue is large enough to keep whole + * data to send. This is needed, because when there is + * not enough free space in the queue, current skb to + * send will be reinserted to the head of tx list of + * the socket to retry transmission later, so if skb + * is bigger than whole queue, it will be reinserted + * again and again, thus blocking other skbs to be sent. + * Each page of the user provided buffer will be added + * as a single buffer to the tx virtqueue, so compare + * number of pages against maximum capacity of the queue. + */ + if (bufs_num <= vq->num_max) + res = true; + } + + rcu_read_unlock(); + + return res; +} + +static bool virtio_transport_msgzerocopy_allow(void) +{ + return true; +} + static bool virtio_transport_seqpacket_allow(u32 remote_cid); static struct virtio_transport virtio_transport = { @@ -446,6 +524,8 @@ static struct virtio_transport virtio_transport = { .seqpacket_allow = virtio_transport_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .msgzerocopy_allow = virtio_transport_msgzerocopy_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, @@ -463,6 +543,7 @@ static struct virtio_transport virtio_transport = { }, .send_pkt = virtio_transport_send_pkt, + .can_msgzerocopy = virtio_transport_can_msgzerocopy, }; static bool virtio_transport_seqpacket_allow(u32 remote_cid) @@ -636,6 +717,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev) { struct virtio_vsock *vsock = NULL; int ret; + int i; ret = mutex_lock_interruptible(&the_virtio_vsock_mutex); if (ret) @@ -678,6 +760,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev) if (ret < 0) goto out; + for (i = 0; i < ARRAY_SIZE(vsock->out_sgs); i++) + vsock->out_sgs[i] = &vsock->out_bufs[i]; + rcu_assign_pointer(the_virtio_vsock, vsock); virtio_vsock_vqs_start(vsock); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index e87fd9480a..16ff976a86 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -37,27 +37,88 @@ virtio_transport_get_ops(struct vsock_sock *vsk) return container_of(t, struct virtio_transport, transport); } -/* Returns a new packet on success, otherwise returns NULL. - * - * If NULL is returned, errp is set to a negative errno. - */ -static struct sk_buff * -virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, - size_t len, - u32 src_cid, - u32 src_port, - u32 dst_cid, - u32 dst_port) -{ - const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len; - struct virtio_vsock_hdr *hdr; - struct sk_buff *skb; - void *payload; - int err; +static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, + struct virtio_vsock_pkt_info *info, + size_t pkt_len) +{ + struct iov_iter *iov_iter; - skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); - if (!skb) - return NULL; + if (!info->msg) + return false; + + iov_iter = &info->msg->msg_iter; + + if (iov_iter->iov_offset) + return false; + + /* We can't send whole iov. */ + if (iov_iter->count > pkt_len) + return false; + + /* Check that transport can send data in zerocopy mode. */ + t_ops = virtio_transport_get_ops(info->vsk); + + if (t_ops->can_msgzerocopy) { + int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); + + /* +1 is for packet header. */ + return t_ops->can_msgzerocopy(pages_to_send + 1); + } + + return true; +} + +static int virtio_transport_init_zcopy_skb(struct vsock_sock *vsk, + struct sk_buff *skb, + struct msghdr *msg, + bool zerocopy) +{ + struct ubuf_info *uarg; + + if (msg->msg_ubuf) { + uarg = msg->msg_ubuf; + net_zcopy_get(uarg); + } else { + struct iov_iter *iter = &msg->msg_iter; + struct ubuf_info_msgzc *uarg_zc; + + uarg = msg_zerocopy_realloc(sk_vsock(vsk), + iter->count, + NULL); + if (!uarg) + return -1; + + uarg_zc = uarg_to_msgzc(uarg); + uarg_zc->zerocopy = zerocopy ? 1 : 0; + } + + skb_zcopy_init(skb, uarg); + + return 0; +} + +static int virtio_transport_fill_skb(struct sk_buff *skb, + struct virtio_vsock_pkt_info *info, + size_t len, + bool zcopy) +{ + if (zcopy) + return __zerocopy_sg_from_iter(info->msg, NULL, skb, + &info->msg->msg_iter, + len); + + return memcpy_from_msg(skb_put(skb, len), info->msg, len); +} + +static void virtio_transport_init_hdr(struct sk_buff *skb, + struct virtio_vsock_pkt_info *info, + size_t payload_len, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct virtio_vsock_hdr *hdr; hdr = virtio_vsock_hdr(skb); hdr->type = cpu_to_le16(info->type); @@ -67,45 +128,30 @@ virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, hdr->src_port = cpu_to_le32(src_port); hdr->dst_port = cpu_to_le32(dst_port); hdr->flags = cpu_to_le32(info->flags); - hdr->len = cpu_to_le32(len); + hdr->len = cpu_to_le32(payload_len); hdr->buf_alloc = cpu_to_le32(0); hdr->fwd_cnt = cpu_to_le32(0); +} - if (info->msg && len > 0) { - payload = skb_put(skb, len); - err = memcpy_from_msg(payload, info->msg, len); - if (err) - goto out; - - if (msg_data_left(info->msg) == 0 && - info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); - - if (info->msg->msg_flags & MSG_EOR) - hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); - } - } - - if (info->reply) - virtio_vsock_skb_set_reply(skb); +static void virtio_transport_copy_nonlinear_skb(const struct sk_buff *skb, + void *dst, + size_t len) +{ + struct iov_iter iov_iter = { 0 }; + struct kvec kvec; + size_t to_copy; - trace_virtio_transport_alloc_pkt(src_cid, src_port, - dst_cid, dst_port, - len, - info->type, - info->op, - info->flags); + kvec.iov_base = dst; + kvec.iov_len = len; - if (info->vsk && !skb_set_owner_sk_safe(skb, sk_vsock(info->vsk))) { - WARN_ONCE(1, "failed to allocate skb on vsock socket with sk_refcnt == 0\n"); - goto out; - } + iov_iter.iter_type = ITER_KVEC; + iov_iter.kvec = &kvec; + iov_iter.nr_segs = 1; - return skb; + to_copy = min_t(size_t, len, skb->len); -out: - kfree_skb(skb); - return NULL; + skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &iov_iter, to_copy); } /* Packet capture */ @@ -116,7 +162,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct af_vsockmon_hdr *hdr; struct sk_buff *skb; size_t payload_len; - void *payload_buf; /* A packet could be split to fit the RX buffer, so we can retrieve * the payload length from the header and the buffer pointer taking @@ -124,7 +169,6 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) */ pkt_hdr = virtio_vsock_hdr(pkt); payload_len = pkt->len; - payload_buf = pkt->data; skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len, GFP_ATOMIC); @@ -167,7 +211,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr)); if (payload_len) { - skb_put_data(skb, payload_buf, payload_len); + if (skb_is_nonlinear(pkt)) { + void *data = skb_put(skb, payload_len); + + virtio_transport_copy_nonlinear_skb(pkt, data, payload_len); + } else { + skb_put_data(skb, pkt->data, payload_len); + } } return skb; @@ -191,6 +241,82 @@ static u16 virtio_transport_get_type(struct sock *sk) return VIRTIO_VSOCK_TYPE_SEQPACKET; } +/* Returns new sk_buff on success, otherwise returns NULL. */ +static struct sk_buff *virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info, + size_t payload_len, + bool zcopy, + u32 src_cid, + u32 src_port, + u32 dst_cid, + u32 dst_port) +{ + struct vsock_sock *vsk; + struct sk_buff *skb; + size_t skb_len; + + skb_len = VIRTIO_VSOCK_SKB_HEADROOM; + + if (!zcopy) + skb_len += payload_len; + + skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL); + if (!skb) + return NULL; + + virtio_transport_init_hdr(skb, info, payload_len, src_cid, src_port, + dst_cid, dst_port); + + vsk = info->vsk; + + /* If 'vsk' != NULL then payload is always present, so we + * will never call '__zerocopy_sg_from_iter()' below without + * setting skb owner in 'skb_set_owner_w()'. The only case + * when 'vsk' == NULL is VIRTIO_VSOCK_OP_RST control message + * without payload. + */ + WARN_ON_ONCE(!(vsk && (info->msg && payload_len)) && zcopy); + + /* Set owner here, because '__zerocopy_sg_from_iter()' uses + * owner of skb without check to update 'sk_wmem_alloc'. + */ + if (vsk) + skb_set_owner_w(skb, sk_vsock(vsk)); + + if (info->msg && payload_len > 0) { + int err; + + err = virtio_transport_fill_skb(skb, info, payload_len, zcopy); + if (err) + goto out; + + if (msg_data_left(info->msg) == 0 && + info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { + struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb); + + hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + + if (info->msg->msg_flags & MSG_EOR) + hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + } + } + + if (info->reply) + virtio_vsock_skb_set_reply(skb); + + trace_virtio_transport_alloc_pkt(src_cid, src_port, + dst_cid, dst_port, + payload_len, + info->type, + info->op, + info->flags, + zcopy); + + return skb; +out: + kfree_skb(skb); + return NULL; +} + /* This function can only be used on connecting/connected sockets, * since a socket assigned to a transport is required. * @@ -199,10 +325,12 @@ static u16 virtio_transport_get_type(struct sock *sk) static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt_info *info) { + u32 max_skb_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; u32 src_cid, src_port, dst_cid, dst_port; const struct virtio_transport *t_ops; struct virtio_vsock_sock *vvs; u32 pkt_len = info->pkt_len; + bool can_zcopy = false; u32 rest_len; int ret; @@ -231,15 +359,30 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW) return pkt_len; + if (info->msg) { + /* If zerocopy is not enabled by 'setsockopt()', we behave as + * there is no MSG_ZEROCOPY flag set. + */ + if (!sock_flag(sk_vsock(vsk), SOCK_ZEROCOPY)) + info->msg->msg_flags &= ~MSG_ZEROCOPY; + + if (info->msg->msg_flags & MSG_ZEROCOPY) + can_zcopy = virtio_transport_can_zcopy(t_ops, info, pkt_len); + + if (can_zcopy) + max_skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, + (MAX_SKB_FRAGS * PAGE_SIZE)); + } + rest_len = pkt_len; do { struct sk_buff *skb; size_t skb_len; - skb_len = min_t(u32, VIRTIO_VSOCK_MAX_PKT_BUF_SIZE, rest_len); + skb_len = min(max_skb_len, rest_len); - skb = virtio_transport_alloc_skb(info, skb_len, + skb = virtio_transport_alloc_skb(info, skb_len, can_zcopy, src_cid, src_port, dst_cid, dst_port); if (!skb) { @@ -247,6 +390,21 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, break; } + /* We process buffer part by part, allocating skb on + * each iteration. If this is last skb for this buffer + * and MSG_ZEROCOPY mode is in use - we must allocate + * completion for the current syscall. + */ + if (info->msg && info->msg->msg_flags & MSG_ZEROCOPY && + skb_len == rest_len && info->op == VIRTIO_VSOCK_OP_RW) { + if (virtio_transport_init_zcopy_skb(vsk, skb, + info->msg, + can_zcopy)) { + ret = -ENOMEM; + break; + } + } + virtio_transport_inc_tx_pkt(vvs, skb); ret = t_ops->send_pkt(skb); @@ -366,9 +524,10 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) goto out; @@ -414,25 +573,27 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, while (total < len && !skb_queue_empty(&vvs->rx_queue)) { skb = skb_peek(&vvs->rx_queue); - bytes = len - total; - if (bytes > skb->len) - bytes = skb->len; + bytes = min_t(size_t, len - total, + skb->len - VIRTIO_VSOCK_SKB_CB(skb)->offset); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, + VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) goto out; spin_lock_bh(&vvs->rx_lock); total += bytes; - skb_pull(skb, bytes); - if (skb->len == 0) { + VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes; + + if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) { u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len); virtio_transport_dec_rx_pkt(vvs, pkt_len); @@ -501,9 +662,10 @@ virtio_transport_seqpacket_do_peek(struct vsock_sock *vsk, spin_unlock_bh(&vvs->rx_lock); /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ - err = memcpy_to_msg(msg, skb->data, bytes); + err = skb_copy_datagram_iter(skb, VIRTIO_VSOCK_SKB_CB(skb)->offset, + &msg->msg_iter, bytes); if (err) return err; @@ -562,11 +724,13 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, int err; /* sk_lock is held by caller so no one else can dequeue. - * Unlock rx_lock since memcpy_to_msg() may sleep. + * Unlock rx_lock since skb_copy_datagram_iter() may sleep. */ spin_unlock_bh(&vvs->rx_lock); - err = memcpy_to_msg(msg, skb->data, bytes_to_copy); + err = skb_copy_datagram_iter(skb, 0, + &msg->msg_iter, + bytes_to_copy); if (err) { /* Copy of message failed. Rest of * fragments will be freed without copy. @@ -963,7 +1127,7 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t, if (!t) return -ENOTCONN; - reply = virtio_transport_alloc_skb(&info, 0, + reply = virtio_transport_alloc_skb(&info, 0, false, le64_to_cpu(hdr->dst_cid), le32_to_cpu(hdr->dst_port), le64_to_cpu(hdr->src_cid), diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c index 0ce65d0a4a..6dea6119f5 100644 --- a/net/vmw_vsock/vsock_loopback.c +++ b/net/vmw_vsock/vsock_loopback.c @@ -47,6 +47,10 @@ static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk) } static bool vsock_loopback_seqpacket_allow(u32 remote_cid); +static bool vsock_loopback_msgzerocopy_allow(void) +{ + return true; +} static struct virtio_transport loopback_transport = { .transport = { @@ -79,6 +83,8 @@ static struct virtio_transport loopback_transport = { .seqpacket_allow = vsock_loopback_seqpacket_allow, .seqpacket_has_data = virtio_transport_seqpacket_has_data, + .msgzerocopy_allow = vsock_loopback_msgzerocopy_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index f620acd2a0..a9ac85e09a 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -201,6 +201,17 @@ config CFG80211_WEXT_EXPORT Drivers should select this option if they require cfg80211's wext compatibility symbols to be exported. +config CFG80211_KUNIT_TEST + tristate "KUnit tests for cfg80211" if !KUNIT_ALL_TESTS + depends on KUNIT + depends on CFG80211 + default KUNIT_ALL_TESTS + depends on !KERNEL_6_2 + help + Enable this option to test cfg80211 functions with kunit. + + If unsure, say N. + endif # CFG80211 config LIB80211 diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 527ae669f6..089c841528 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_LIB80211) += lib80211.o obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o +obj-y += tests/ obj-$(CONFIG_WEXT_CORE) += wext-core.o obj-$(CONFIG_WEXT_PROC) += wext-proc.o diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 0962770303..9a9a870806 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Parts of this file are - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/export.h> @@ -18,7 +18,7 @@ static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!rdev->ops->stop_ap) return -EOPNOTSUPP; @@ -52,9 +52,9 @@ static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return err; } -int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, int link_id, - bool notify) +int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, int link_id, + bool notify) { unsigned int link; int ret = 0; @@ -72,17 +72,3 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return ret; } - -int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, int link_id, - bool notify) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_stop_ap(rdev, dev, link_id, notify); - wdev_unlock(wdev); - - return err; -} diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 0b7e81db38..2d21e423ab 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018-2022 Intel Corporation + * Copyright 2018-2023 Intel Corporation */ #include <linux/export.h> @@ -666,6 +666,7 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, return (r1 + r2 > 0); } +EXPORT_SYMBOL(cfg80211_chandef_dfs_usable); /* * Checks if center frequency of chan falls with in the bandwidth @@ -713,7 +714,7 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) { unsigned int link; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: @@ -782,18 +783,14 @@ static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, { struct wireless_dev *wdev; + lockdep_assert_wiphy(wiphy); + list_for_each_entry(wdev, &wiphy->wdev_list, list) { - wdev_lock(wdev); - if (!cfg80211_beaconing_iface_active(wdev)) { - wdev_unlock(wdev); + if (!cfg80211_beaconing_iface_active(wdev)) continue; - } - if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) { - wdev_unlock(wdev); + if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) return true; - } - wdev_unlock(wdev); } return false; @@ -823,14 +820,18 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, if (!(chan->flags & IEEE80211_CHAN_RADAR)) return false; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { + bool found; + if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) continue; - if (cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan)) - return true; + wiphy_lock(&rdev->wiphy); + found = cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan) || + cfg80211_offchan_chain_is_active(rdev, chan); + wiphy_unlock(&rdev->wiphy); - if (cfg80211_offchan_chain_is_active(rdev, chan)) + if (found) return true; } @@ -965,6 +966,7 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, return max(t1, t2); } +EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time); static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 center_freq, u32 bandwidth, @@ -1162,8 +1164,7 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, if (!sband) return false; - for (i = 0; i < sband->n_iftype_data; i++) { - iftd = &sband->iftype_data[i]; + for_each_sband_iftype_data(sband, i, iftd) { if (!iftd->eht_cap.has_eht) continue; @@ -1321,10 +1322,7 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { bool ret; - wdev_lock(wdev); ret = cfg80211_ir_permissive_check_wdev(iftype, wdev, chan); - wdev_unlock(wdev); - if (ret) return ret; } @@ -1433,17 +1431,10 @@ EXPORT_SYMBOL(cfg80211_any_usable_channels); struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, unsigned int link_id) { - /* - * We need to sort out the locking here - in some cases - * where we get here we really just don't care (yet) - * about the valid links, but in others we do. But we - * get here with various driver cases, so we cannot - * easily require the wdev mutex. - */ - if (link_id || wdev->valid_links & BIT(0)) { - ASSERT_WDEV_LOCK(wdev); - WARN_ON(!(wdev->valid_links & BIT(link_id))); - } + lockdep_assert_wiphy(wdev->wiphy); + + WARN_ON(wdev->valid_links && !(wdev->valid_links & BIT(link_id))); + WARN_ON(!wdev->valid_links && link_id > 0); switch (wdev->iftype) { case NL80211_IFTYPE_MESH_POINT: diff --git a/net/wireless/core.c b/net/wireless/core.c index f6ada0a729..3fb1b63735 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -60,7 +60,7 @@ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (rdev->wiphy_idx == wiphy_idx) { result = rdev; break; @@ -116,7 +116,7 @@ static int cfg80211_dev_check_name(struct cfg80211_registered_device *rdev, } /* Ensure another device does not already have this name. */ - list_for_each_entry(rdev2, &cfg80211_rdev_list, list) + for_each_rdev(rdev2) if (strcmp(newname, wiphy_name(&rdev2->wiphy)) == 0) return -EINVAL; @@ -191,13 +191,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, return err; } + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); } - wiphy_lock(&rdev->wiphy); nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY); wiphy_net_set(&rdev->wiphy, net); @@ -206,13 +206,13 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, WARN_ON(err); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); - wiphy_unlock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); } + wiphy_unlock(&rdev->wiphy); return 0; } @@ -823,6 +823,7 @@ int wiphy_register(struct wiphy *wiphy) /* sanity check supported bands/channels */ for (band = 0; band < NUM_NL80211_BANDS; band++) { + const struct ieee80211_sband_iftype_data *iftd; u16 types = 0; bool have_he = false; @@ -879,14 +880,11 @@ int wiphy_register(struct wiphy *wiphy) return -EINVAL; } - for (i = 0; i < sband->n_iftype_data; i++) { - const struct ieee80211_sband_iftype_data *iftd; + for_each_sband_iftype_data(sband, i, iftd) { bool has_ap, has_non_ap; u32 ap_bits = BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_P2P_GO); - iftd = &sband->iftype_data[i]; - if (WARN_ON(!iftd->types_mask)) return -EINVAL; if (WARN_ON(types & iftd->types_mask)) @@ -1278,14 +1276,13 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } -void __cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; struct cfg80211_sched_scan_request *pos, *tmp; lockdep_assert_held(&rdev->wiphy.mtx); - ASSERT_WDEV_LOCK(wdev); cfg80211_pmsr_wdev_down(wdev); @@ -1293,7 +1290,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - __cfg80211_leave_ibss(rdev, dev, true); + cfg80211_leave_ibss(rdev, dev, true); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: @@ -1313,14 +1310,14 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, WLAN_REASON_DEAUTH_LEAVING, true); break; case NL80211_IFTYPE_MESH_POINT: - __cfg80211_leave_mesh(rdev, dev); + cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, dev, -1, true); + cfg80211_stop_ap(rdev, dev, -1, true); break; case NL80211_IFTYPE_OCB: - __cfg80211_leave_ocb(rdev, dev); + cfg80211_leave_ocb(rdev, dev); break; case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: @@ -1338,14 +1335,6 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, } } -void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) -{ - wdev_lock(wdev); - __cfg80211_leave(rdev, wdev); - wdev_unlock(wdev); -} - void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev, gfp_t gfp) { @@ -1370,7 +1359,6 @@ EXPORT_SYMBOL(cfg80211_stop_iface); void cfg80211_init_wdev(struct wireless_dev *wdev) { - mutex_init(&wdev->mtx); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); @@ -1535,7 +1523,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, case NETDEV_UP: wiphy_lock(&rdev->wiphy); cfg80211_update_iface_num(rdev, wdev->iftype, 1); - wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT case NL80211_IFTYPE_ADHOC: @@ -1565,7 +1552,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, default: break; } - wdev_unlock(wdev); rdev->opencount++; /* @@ -1608,7 +1594,7 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) struct cfg80211_registered_device *rdev; rtnl_lock(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (net_eq(wiphy_net(&rdev->wiphy), net)) WARN_ON(cfg80211_switch_netns(rdev, &init_net)); } @@ -1675,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy, unsigned long delay) { if (!delay) { + del_timer(&dwork->timer); wiphy_work_queue(wiphy, &dwork->work); return; } diff --git a/net/wireless/core.h b/net/wireless/core.h index f0a3a23176..cb61d33d4f 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -160,6 +160,16 @@ extern struct workqueue_struct *cfg80211_wq; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; +/* This is constructed like this so it can be used in if/else */ +static inline int for_each_rdev_check_rtnl(void) +{ + ASSERT_RTNL(); + return 0; +} +#define for_each_rdev(rdev) \ + if (for_each_rdev_check_rtnl()) {} else \ + list_for_each_entry(rdev, &cfg80211_rdev_list, list) + struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; @@ -225,22 +235,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev); void cfg80211_register_wdev(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); -static inline void wdev_lock(struct wireless_dev *wdev) - __acquires(wdev) -{ - mutex_lock(&wdev->mtx); - __acquire(wdev->mtx); -} - -static inline void wdev_unlock(struct wireless_dev *wdev) - __releases(wdev) -{ - __release(wdev->mtx); - mutex_unlock(&wdev->mtx); -} - -#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) - static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) { lockdep_assert_held(&rdev->wiphy.mtx); @@ -276,7 +270,7 @@ struct cfg80211_event { struct ieee80211_channel *channel; } ij; struct { - u8 bssid[ETH_ALEN]; + u8 peer_addr[ETH_ALEN]; const u8 *td_bitmap; u8 td_bitmap_len; } pa; @@ -330,8 +324,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct cfg80211_ibss_params *params, struct cfg80211_cached_keys *connkeys); void cfg80211_clear_ibss(struct net_device *dev, bool nowext); -int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool nowext); int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext); void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, @@ -346,8 +338,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, struct mesh_setup *setup, const struct mesh_config *conf); -int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, @@ -355,21 +345,13 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef); /* OCB */ -int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ocb_setup *setup); int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ocb_setup *setup); -int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev); int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, struct net_device *dev); /* AP */ -int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, int link, - bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, int link, bool notify); @@ -423,7 +405,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info); -void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid, +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); @@ -476,29 +458,12 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev); extern struct work_struct cfg80211_disconnect_work; -/** - * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable - * @wiphy: the wiphy to validate against - * @chandef: the channel definition to check - * - * Checks if chandef is usable and we can/need start CAC on such channel. - * - * Return: true if all channels available and at least - * one channel requires CAC (NL80211_DFS_USABLE) - */ -bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef); - void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); void cfg80211_dfs_channels_update_work(struct work_struct *work); -unsigned int -cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, - const struct cfg80211_chan_def *chandef); - void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); int @@ -547,8 +512,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); -void __cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev); void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 0878b16289..40e49074e2 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -4,6 +4,7 @@ * * Copyright 2009 Luis R. Rodriguez <lrodriguez@atheros.com> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * Copyright (C) 2023 Intel Corporation */ #include <linux/slab.h> @@ -109,3 +110,162 @@ void cfg80211_debugfs_rdev_add(struct cfg80211_registered_device *rdev) DEBUGFS_ADD(long_retry_limit); DEBUGFS_ADD(ht40allow_map); } + +struct debugfs_read_work { + struct wiphy_work work; + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data); + struct wiphy *wiphy; + struct file *file; + char *buf; + size_t bufsize; + void *data; + ssize_t ret; + struct completion completion; +}; + +static void wiphy_locked_debugfs_read_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct debugfs_read_work *w = container_of(work, typeof(*w), work); + + w->ret = w->handler(w->wiphy, w->file, w->buf, w->bufsize, w->data); + complete(&w->completion); +} + +static void wiphy_locked_debugfs_read_cancel(struct dentry *dentry, + void *data) +{ + struct debugfs_read_work *w = data; + + wiphy_work_cancel(w->wiphy, &w->work); + complete(&w->completion); +} + +ssize_t wiphy_locked_debugfs_read(struct wiphy *wiphy, struct file *file, + char *buf, size_t bufsize, + char __user *userbuf, size_t count, + loff_t *ppos, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t bufsize, + void *data), + void *data) +{ + struct debugfs_read_work work = { + .handler = handler, + .wiphy = wiphy, + .file = file, + .buf = buf, + .bufsize = bufsize, + .data = data, + .ret = -ENODEV, + .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion), + }; + struct debugfs_cancellation cancellation = { + .cancel = wiphy_locked_debugfs_read_cancel, + .cancel_data = &work, + }; + + /* don't leak stack data or whatever */ + memset(buf, 0, bufsize); + + wiphy_work_init(&work.work, wiphy_locked_debugfs_read_work); + wiphy_work_queue(wiphy, &work.work); + + debugfs_enter_cancellation(file, &cancellation); + wait_for_completion(&work.completion); + debugfs_leave_cancellation(file, &cancellation); + + if (work.ret < 0) + return work.ret; + + if (WARN_ON(work.ret > bufsize)) + return -EINVAL; + + return simple_read_from_buffer(userbuf, count, ppos, buf, work.ret); +} +EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_read); + +struct debugfs_write_work { + struct wiphy_work work; + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data); + struct wiphy *wiphy; + struct file *file; + char *buf; + size_t count; + void *data; + ssize_t ret; + struct completion completion; +}; + +static void wiphy_locked_debugfs_write_work(struct wiphy *wiphy, + struct wiphy_work *work) +{ + struct debugfs_write_work *w = container_of(work, typeof(*w), work); + + w->ret = w->handler(w->wiphy, w->file, w->buf, w->count, w->data); + complete(&w->completion); +} + +static void wiphy_locked_debugfs_write_cancel(struct dentry *dentry, + void *data) +{ + struct debugfs_write_work *w = data; + + wiphy_work_cancel(w->wiphy, &w->work); + complete(&w->completion); +} + +ssize_t wiphy_locked_debugfs_write(struct wiphy *wiphy, + struct file *file, char *buf, size_t bufsize, + const char __user *userbuf, size_t count, + ssize_t (*handler)(struct wiphy *wiphy, + struct file *file, + char *buf, + size_t count, + void *data), + void *data) +{ + struct debugfs_write_work work = { + .handler = handler, + .wiphy = wiphy, + .file = file, + .buf = buf, + .count = count, + .data = data, + .ret = -ENODEV, + .completion = COMPLETION_INITIALIZER_ONSTACK(work.completion), + }; + struct debugfs_cancellation cancellation = { + .cancel = wiphy_locked_debugfs_write_cancel, + .cancel_data = &work, + }; + + /* mostly used for strings so enforce NUL-termination for safety */ + if (count >= bufsize) + return -EINVAL; + + memset(buf, 0, bufsize); + + if (copy_from_user(buf, userbuf, count)) + return -EFAULT; + + wiphy_work_init(&work.work, wiphy_locked_debugfs_write_work); + wiphy_work_queue(wiphy, &work.work); + + debugfs_enter_cancellation(file, &cancellation); + wait_for_completion(&work.completion); + debugfs_leave_cancellation(file, &cancellation); + + return work.ret; +} +EXPORT_SYMBOL_GPL(wiphy_locked_debugfs_write); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index e6fdb0b818..9f02ee5f08 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -3,7 +3,7 @@ * Some IBSS support code for cfg80211. * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation */ #include <linux/etherdevice.h> @@ -93,7 +93,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, int err; lockdep_assert_held(&rdev->wiphy.mtx); - ASSERT_WDEV_LOCK(wdev); if (wdev->u.ibss.ssid_len) return -EALREADY; @@ -151,13 +150,13 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, return 0; } -static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) +void cfg80211_clear_ibss(struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; @@ -187,22 +186,13 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) cfg80211_sched_dfs_chan_update(rdev); } -void cfg80211_clear_ibss(struct net_device *dev, bool nowext) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_clear_ibss(dev, nowext); - wdev_unlock(wdev); -} - -int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool nowext) +int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->u.ibss.ssid_len) return -ENOLINK; @@ -213,24 +203,11 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, return err; wdev->conn_owner_nlportid = 0; - __cfg80211_clear_ibss(dev, nowext); + cfg80211_clear_ibss(dev, nowext); return 0; } -int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool nowext) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_leave_ibss(rdev, dev, nowext); - wdev_unlock(wdev); - - return err; -} - #ifdef CONFIG_CFG80211_WEXT int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) @@ -239,7 +216,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, enum nl80211_band band; int i, err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->wext.ibss.beacon_interval) wdev->wext.ibss.beacon_interval = 100; @@ -336,11 +313,9 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (wdev->wext.ibss.chandef.chan == chan) return 0; - wdev_lock(wdev); err = 0; if (wdev->u.ibss.ssid_len) - err = __cfg80211_leave_ibss(rdev, dev, true); - wdev_unlock(wdev); + err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; @@ -354,11 +329,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev->wext.ibss.channel_fixed = false; } - wdev_lock(wdev); - err = cfg80211_ibss_wext_join(rdev, wdev); - wdev_unlock(wdev); - - return err; + return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwfreq(struct net_device *dev, @@ -372,12 +343,10 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return -EINVAL; - wdev_lock(wdev); if (wdev->u.ibss.current_bss) chan = wdev->u.ibss.current_bss->pub.channel; else if (wdev->wext.ibss.chandef.chan) chan = wdev->wext.ibss.chandef.chan; - wdev_unlock(wdev); if (chan) { freq->m = chan->center_freq; @@ -405,11 +374,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (!rdev->ops->join_ibss) return -EOPNOTSUPP; - wdev_lock(wdev); err = 0; if (wdev->u.ibss.ssid_len) - err = __cfg80211_leave_ibss(rdev, dev, true); - wdev_unlock(wdev); + err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; @@ -422,11 +389,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, wdev->wext.ibss.ssid = wdev->u.ibss.ssid; wdev->wext.ibss.ssid_len = len; - wdev_lock(wdev); - err = cfg80211_ibss_wext_join(rdev, wdev); - wdev_unlock(wdev); - - return err; + return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwessid(struct net_device *dev, @@ -441,7 +404,6 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev, data->flags = 0; - wdev_lock(wdev); if (wdev->u.ibss.ssid_len) { data->flags = 1; data->length = wdev->u.ibss.ssid_len; @@ -451,7 +413,6 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev, data->length = wdev->wext.ibss.ssid_len; memcpy(ssid, wdev->wext.ibss.ssid, data->length); } - wdev_unlock(wdev); return 0; } @@ -491,11 +452,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, ether_addr_equal(bssid, wdev->wext.ibss.bssid)) return 0; - wdev_lock(wdev); err = 0; if (wdev->u.ibss.ssid_len) - err = __cfg80211_leave_ibss(rdev, dev, true); - wdev_unlock(wdev); + err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; @@ -506,11 +465,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, } else wdev->wext.ibss.bssid = NULL; - wdev_lock(wdev); - err = cfg80211_ibss_wext_join(rdev, wdev); - wdev_unlock(wdev); - - return err; + return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwap(struct net_device *dev, @@ -525,7 +480,6 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; - wdev_lock(wdev); if (wdev->u.ibss.current_bss) memcpy(ap_addr->sa_data, wdev->u.ibss.current_bss->pub.bssid, ETH_ALEN); @@ -534,8 +488,6 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, else eth_zero_addr(ap_addr->sa_data); - wdev_unlock(wdev); - return 0; } #endif diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 1b4d6c87a5..5c8cdf7681 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -662,12 +662,12 @@ static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) memcpy(key, tkey->key, TKIP_KEY_LEN); if (seq) { - /* Return the sequence number of the last transmitted frame. */ - u16 iv16 = tkey->tx_iv16; - u32 iv32 = tkey->tx_iv32; - if (iv16 == 0) - iv32--; - iv16--; + /* + * Not clear if this should return the value as is + * or - as the code previously seemed to partially + * have been written as - subtract one from it. It + * was working this way for a long time so leave it. + */ seq[0] = tkey->tx_iv16; seq[1] = tkey->tx_iv16 >> 8; seq[2] = tkey->tx_iv32; diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 59a3c5c092..83306979fb 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Portions - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/export.h> @@ -109,7 +109,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; @@ -172,7 +172,6 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, * basic rates */ if (!setup->basic_rates) { - enum nl80211_bss_scan_width scan_width; struct ieee80211_supported_band *sband = rdev->wiphy.bands[setup->chandef.chan->band]; @@ -193,9 +192,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, } } } else { - scan_width = cfg80211_chandef_to_scan_width(&setup->chandef); - setup->basic_rates = ieee80211_mandatory_rates(sband, - scan_width); + setup->basic_rates = ieee80211_mandatory_rates(sband); } } @@ -257,13 +254,13 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, return 0; } -int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; @@ -287,16 +284,3 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, return err; } - -int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, - struct net_device *dev) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_leave_mesh(rdev, dev); - wdev_unlock(wdev); - - return err; -} diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 55a1d36338..bad9e4fd84 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -4,7 +4,7 @@ * * Copyright (c) 2009, Jouni Malinen <j@w1.fi> * Copyright (c) 2015 Intel Deutschland GmbH - * Copyright (C) 2019-2020, 2022 Intel Corporation + * Copyright (C) 2019-2020, 2022-2023 Intel Corporation */ #include <linux/kernel.h> @@ -22,7 +22,7 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, - struct cfg80211_rx_assoc_resp *data) + struct cfg80211_rx_assoc_resp_data *data) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; @@ -151,7 +151,7 @@ void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_mgmt *mgmt = (void *)buf; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_rx_mlme_mgmt(dev, buf, len); @@ -216,7 +216,7 @@ void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len, struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_mgmt *mgmt = (void *)buf; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_tx_mlme_mgmt(dev, buf, len, reconnect); @@ -264,7 +264,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, { struct wireless_dev *wdev = dev->ieee80211_ptr; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!req->bss) return -ENOENT; @@ -333,7 +333,7 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err, i, j; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); for (i = 1; i < ARRAY_SIZE(req->links); i++) { if (!req->links[i].bss) @@ -395,7 +395,7 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, .local_state_change = local_state_change, }; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (local_state_change && (!wdev->connected || @@ -425,7 +425,7 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, }; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->connected) return -ENOTCONN; @@ -448,7 +448,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; u8 bssid[ETH_ALEN]; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!rdev->ops->deauth) return; @@ -728,6 +728,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, const struct ieee80211_mgmt *mgmt; u16 stype; + lockdep_assert_wiphy(&rdev->wiphy); + if (!wdev->wiphy->mgmt_stypes) return -EOPNOTSUPP; @@ -750,8 +752,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { int err = 0; - wdev_lock(wdev); - switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: /* @@ -816,7 +816,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, err = -EOPNOTSUPP; break; } - wdev_unlock(wdev); if (err) return err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0b0dfecedc..fbf95b7ff6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -106,7 +106,7 @@ __cfg80211_wdev_from_attrs(struct cfg80211_registered_device *rdev, ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { struct wireless_dev *wdev; if (wiphy_net(&rdev->wiphy) != netns) @@ -463,7 +463,7 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = { [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, }; -static struct netlink_range_validation nl80211_punct_bitmap_range = { +static const struct netlink_range_validation nl80211_punct_bitmap_range = { .min = 0, .max = 0xffff, }; @@ -1115,6 +1115,10 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_OFFSET, chan->freq_offset)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_PSD) && + nla_put_s8(msg, NL80211_FREQUENCY_ATTR_PSD, chan->psd)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_DISABLED) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED)) goto nla_put_failure; @@ -1544,7 +1548,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, static int nl80211_key_allowed(struct wireless_dev *wdev) { - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: @@ -1913,20 +1917,20 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, struct nlattr *nl_iftype_data = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_IFTYPE_DATA); + const struct ieee80211_sband_iftype_data *iftd; int err; if (!nl_iftype_data) return -ENOBUFS; - for (i = 0; i < sband->n_iftype_data; i++) { + for_each_sband_iftype_data(sband, i, iftd) { struct nlattr *iftdata; iftdata = nla_nest_start_noflag(msg, i + 1); if (!iftdata) return -ENOBUFS; - err = nl80211_send_iftype_data(msg, sband, - &sband->iftype_data[i]); + err = nl80211_send_iftype_data(msg, sband, iftd); if (err) return err; @@ -3075,7 +3079,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) cb->args[0] = (long)state; } - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (++idx <= state->start) @@ -3423,13 +3427,8 @@ static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; int link_id = nl80211_link_id_or_invalid(info->attrs); struct net_device *netdev = info->user_ptr[1]; - int ret; - - wdev_lock(netdev->ieee80211_ptr); - ret = __nl80211_set_channel(rdev, netdev, info, link_id); - wdev_unlock(netdev->ieee80211_ptr); - return ret; + return __nl80211_set_channel(rdev, netdev, info, link_id); } static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) @@ -3536,7 +3535,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) txq_params.link_id = nl80211_link_id_or_invalid(info->attrs); - wdev_lock(netdev->ieee80211_ptr); if (txq_params.link_id >= 0 && !(netdev->ieee80211_ptr->valid_links & BIT(txq_params.link_id))) @@ -3547,7 +3545,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) else result = rdev_set_txq_params(rdev, netdev, &txq_params); - wdev_unlock(netdev->ieee80211_ptr); if (result) goto out; } @@ -3557,12 +3554,10 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) int link_id = nl80211_link_id_or_invalid(info->attrs); if (wdev) { - wdev_lock(wdev); result = __nl80211_set_channel( rdev, nl80211_can_set_dev_channel(wdev) ? netdev : NULL, info, link_id); - wdev_unlock(wdev); } else { result = __nl80211_set_channel(rdev, netdev, info, link_id); } @@ -3827,6 +3822,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag struct net_device *dev = wdev->netdev; void *hdr; + lockdep_assert_wiphy(&rdev->wiphy); + WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE && cmd != NL80211_CMD_DEL_INTERFACE && cmd != NL80211_CMD_SET_INTERFACE); @@ -3870,33 +3867,31 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (wdev->u.ap.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len, wdev->u.ap.ssid)) - goto nla_put_failure_locked; + goto nla_put_failure; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->u.client.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.client.ssid_len, wdev->u.client.ssid)) - goto nla_put_failure_locked; + goto nla_put_failure; break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ibss.ssid_len, wdev->u.ibss.ssid)) - goto nla_put_failure_locked; + goto nla_put_failure; break; default: /* nothing */ break; } - wdev_unlock(wdev); if (rdev->ops->get_txq_stats) { struct cfg80211_txq_stats txqstats = {}; @@ -3943,8 +3938,6 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag genlmsg_end(msg, hdr); return 0; - nla_put_failure_locked: - wdev_unlock(wdev); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -3985,7 +3978,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * filter_wiphy = cb->args[2] - 1; } - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (wp_idx < wp_start) { @@ -3998,6 +3991,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx = 0; + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (if_idx < if_start) { if_idx++; @@ -4007,11 +4001,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { + wiphy_unlock(&rdev->wiphy); goto out; } if_idx++; } + wiphy_unlock(&rdev->wiphy); + if_start = 0; wp_idx++; } out: @@ -4191,7 +4188,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (netif_running(dev)) return -EBUSY; - wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); wdev->u.mesh.id_up_len = @@ -4199,7 +4195,6 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) memcpy(wdev->u.mesh.id, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->u.mesh.id_up_len); - wdev_unlock(wdev); } if (info->attrs[NL80211_ATTR_4ADDR]) { @@ -4300,7 +4295,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) break; - wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); wdev->u.mesh.id_up_len = @@ -4308,7 +4302,6 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) memcpy(wdev->u.mesh.id, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->u.mesh.id_up_len); - wdev_unlock(wdev); break; case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: @@ -4599,79 +4592,67 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) !(key.p.mode == NL80211_KEY_SET_TX)) return -EINVAL; - wdev_lock(wdev); - if (key.def) { - if (!rdev->ops->set_default_key) { - err = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_default_key) + return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) - goto out; + return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) - goto out; + return err; err = rdev_set_default_key(rdev, dev, link_id, key.idx, key.def_uni, key.def_multi); if (err) - goto out; + return err; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = key.idx; #endif + return 0; } else if (key.defmgmt) { - if (key.def_uni || !key.def_multi) { - err = -EINVAL; - goto out; - } + if (key.def_uni || !key.def_multi) + return -EINVAL; - if (!rdev->ops->set_default_mgmt_key) { - err = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_default_mgmt_key) + return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) - goto out; + return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) - goto out; + return err; err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx); if (err) - goto out; + return err; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_mgmt_key = key.idx; #endif + return 0; } else if (key.defbeacon) { - if (key.def_uni || !key.def_multi) { - err = -EINVAL; - goto out; - } + if (key.def_uni || !key.def_multi) + return -EINVAL; - if (!rdev->ops->set_default_beacon_key) { - err = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_default_beacon_key) + return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) - goto out; + return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) - goto out; + return err; - err = rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); - if (err) - goto out; + return rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); } else if (key.p.mode == NL80211_KEY_SET_TX && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) { @@ -4680,25 +4661,19 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (!mac_addr || key.idx < 0 || key.idx > 1) { - err = -EINVAL; - goto out; - } + if (!mac_addr || key.idx < 0 || key.idx > 1) + return -EINVAL; err = nl80211_validate_key_link_id(info, wdev, link_id, true); if (err) - goto out; + return err; - err = rdev_add_key(rdev, dev, link_id, key.idx, - NL80211_KEYTYPE_PAIRWISE, - mac_addr, &key.p); - } else { - err = -EINVAL; + return rdev_add_key(rdev, dev, link_id, key.idx, + NL80211_KEYTYPE_PAIRWISE, + mac_addr, &key.p); } - out: - wdev_unlock(wdev); - return err; + return -EINVAL; } static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) @@ -4751,7 +4726,6 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(wdev); err = nl80211_key_allowed(wdev); if (err) GENL_SET_ERR_MSG(info, "key not allowed"); @@ -4767,7 +4741,6 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (err) GENL_SET_ERR_MSG(info, "key addition failed"); } - wdev_unlock(wdev); return err; } @@ -4808,7 +4781,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_key) return -EOPNOTSUPP; - wdev_lock(wdev); err = nl80211_key_allowed(wdev); if (key.type == NL80211_KEYTYPE_GROUP && mac_addr && @@ -4832,7 +4804,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) wdev->wext.default_mgmt_key = -1; } #endif - wdev_unlock(wdev); return err; } @@ -5671,11 +5642,10 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs, static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, struct nlattr *attrs, - struct cfg80211_ap_settings *params) + struct cfg80211_fils_discovery *fd) { struct nlattr *tb[NL80211_FILS_DISCOVERY_ATTR_MAX + 1]; int ret; - struct cfg80211_fils_discovery *fd = ¶ms->fils_discovery; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_DISCOVERY)) @@ -5686,6 +5656,13 @@ static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, if (ret) return ret; + if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] && + !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] && + !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) { + fd->update = true; + return 0; + } + if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] || !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] || !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) @@ -5695,19 +5672,17 @@ static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, fd->tmpl = nla_data(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]); fd->min_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN]); fd->max_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX]); - + fd->update = true; return 0; } static int nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, struct nlattr *attrs, - struct cfg80211_ap_settings *params) + struct cfg80211_unsol_bcast_probe_resp *presp) { struct nlattr *tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1]; int ret; - struct cfg80211_unsol_bcast_probe_resp *presp = - ¶ms->unsol_bcast_probe_resp; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP)) @@ -5718,6 +5693,12 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, if (ret) return ret; + if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] && + !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) { + presp->update = true; + return 0; + } + if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] || !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) return -EINVAL; @@ -5725,6 +5706,7 @@ nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, presp->tmpl = nla_data(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]); presp->tmpl_len = nla_len(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]); presp->interval = nla_get_u32(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT]); + presp->update = true; return 0; } @@ -6087,20 +6069,18 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - wdev_lock(wdev); - if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, ¶ms->beacon_rate, dev, false, link_id); if (err) - goto out_unlock; + goto out; err = validate_beacon_tx_rate(rdev, params->chandef.chan->band, ¶ms->beacon_rate); if (err) - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_SMPS_MODE]) { @@ -6113,19 +6093,19 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.features & NL80211_FEATURE_STATIC_SMPS)) { err = -EINVAL; - goto out_unlock; + goto out; } break; case NL80211_SMPS_DYNAMIC: if (!(rdev->wiphy.features & NL80211_FEATURE_DYNAMIC_SMPS)) { err = -EINVAL; - goto out_unlock; + goto out; } break; default: err = -EINVAL; - goto out_unlock; + goto out; } } else { params->smps_mode = NL80211_SMPS_OFF; @@ -6134,7 +6114,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { err = -EOPNOTSUPP; - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_ACL_POLICY]) { @@ -6142,7 +6122,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(params->acl)) { err = PTR_ERR(params->acl); params->acl = NULL; - goto out_unlock; + goto out; } } @@ -6154,23 +6134,23 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms->he_obss_pd); if (err) - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { err = nl80211_parse_fils_discovery(rdev, info->attrs[NL80211_ATTR_FILS_DISCOVERY], - params); + ¶ms->fils_discovery); if (err) - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) { err = nl80211_parse_unsol_bcast_probe_resp( rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], - params); + ¶ms->unsol_bcast_probe_resp); if (err) - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) { @@ -6181,21 +6161,21 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params->beacon.mbssid_ies->cnt : 0); if (err) - goto out_unlock; + goto out; } if (!params->mbssid_config.ema && params->beacon.rnr_ies) { err = -EINVAL; - goto out_unlock; + goto out; } err = nl80211_calculate_ap_params(params); if (err) - goto out_unlock; + goto out; err = nl80211_validate_ap_phy_operation(params); if (err) - goto out_unlock; + goto out; if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]) params->flags = nla_get_u32( @@ -6207,7 +6187,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_SOCKET_OWNER] && wdev->conn_owner_nlportid != info->snd_portid) { err = -EINVAL; - goto out_unlock; + goto out; } /* FIXME: validate MLO/link-id against driver capabilities */ @@ -6225,8 +6205,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) nl80211_send_ap_started(wdev, link_id); } -out_unlock: - wdev_unlock(wdev); out: kfree(params->acl); kfree(params->beacon.mbssid_ies); @@ -6246,7 +6224,8 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_beacon_data params; + struct cfg80211_ap_update *params; + struct nlattr *attr; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && @@ -6259,17 +6238,37 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (!wdev->links[link_id].ap.beacon_interval) return -EINVAL; - err = nl80211_parse_beacon(rdev, info->attrs, ¶ms, info->extack); + params = kzalloc(sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; + + err = nl80211_parse_beacon(rdev, info->attrs, ¶ms->beacon, + info->extack); if (err) goto out; - wdev_lock(wdev); - err = rdev_change_beacon(rdev, dev, ¶ms); - wdev_unlock(wdev); + attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY]; + if (attr) { + err = nl80211_parse_fils_discovery(rdev, attr, + ¶ms->fils_discovery); + if (err) + goto out; + } + + attr = info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]; + if (attr) { + err = nl80211_parse_unsol_bcast_probe_resp(rdev, attr, + ¶ms->unsol_bcast_probe_resp); + if (err) + goto out; + } + + err = rdev_change_beacon(rdev, dev, params); out: - kfree(params.mbssid_ies); - kfree(params.rnr_ies); + kfree(params->beacon.mbssid_ies); + kfree(params->beacon.rnr_ies); + kfree(params); return err; } @@ -7324,9 +7323,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) } /* driver will call cfg80211_check_station_change() */ - wdev_lock(dev->ieee80211_ptr); err = rdev_change_station(rdev, dev, mac_addr, ¶ms); - wdev_unlock(dev->ieee80211_ptr); out_put_vlan: dev_put(params.vlan); @@ -7594,7 +7591,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* be aware of params.vlan when changing code here */ - wdev_lock(dev->ieee80211_ptr); if (wdev->valid_links) { if (params.link_sta_params.link_id < 0) { err = -EINVAL; @@ -7612,7 +7608,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) } err = rdev_add_station(rdev, dev, mac_addr, ¶ms); out: - wdev_unlock(dev->ieee80211_ptr); dev_put(params.vlan); return err; } @@ -7622,7 +7617,6 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct station_del_parameters params; - int ret; memset(¶ms, 0, sizeof(params)); @@ -7670,11 +7664,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID; } - wdev_lock(dev->ieee80211_ptr); - ret = rdev_del_station(rdev, dev, ¶ms); - wdev_unlock(dev->ieee80211_ptr); - - return ret; + return rdev_del_station(rdev, dev, ¶ms); } static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, @@ -7993,9 +7983,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; struct bss_parameters params; - int err; memset(¶ms, 0, sizeof(params)); params.link_id = nl80211_link_id_or_invalid(info->attrs); @@ -8058,11 +8046,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - wdev_lock(wdev); - err = rdev_change_bss(rdev, dev, ¶ms); - wdev_unlock(wdev); - - return err; + return rdev_change_bss(rdev, dev, ¶ms); } static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) @@ -8133,13 +8117,11 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, if (!rdev->ops->get_mesh_config) return -EOPNOTSUPP; - wdev_lock(wdev); /* If not connected, get default parameters */ if (!wdev->u.mesh.id_len) memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); else err = rdev_get_mesh_config(rdev, dev, &cur_params); - wdev_unlock(wdev); if (err) return err; @@ -8515,15 +8497,12 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, if (err) return err; - wdev_lock(wdev); if (!wdev->u.mesh.id_len) err = -ENOLINK; if (!err) err = rdev_update_mesh_config(rdev, dev, mask, &cfg); - wdev_unlock(wdev); - return err; } @@ -8578,6 +8557,11 @@ static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, reg_rule->dfs_cac_ms)) goto nla_put_failure; + if ((reg_rule->flags & NL80211_RRF_PSD) && + nla_put_s8(msg, NL80211_ATTR_POWER_RULE_PSD, + reg_rule->psd)) + goto nla_put_failure; + nla_nest_end(msg, nl_reg_rule); } @@ -9014,7 +8998,7 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, unsigned int link_id; bool all_ok = true; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!cfg80211_beaconing_iface_active(wdev)) return true; @@ -9264,7 +9248,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->n_channels = i; - wdev_lock(wdev); for (i = 0; i < request->n_channels; i++) { struct ieee80211_channel *chan = request->channels[i]; @@ -9273,12 +9256,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) continue; if (!cfg80211_wdev_on_sub_chan(wdev, chan, true)) { - wdev_unlock(wdev); err = -EBUSY; goto out_free; } } - wdev_unlock(wdev); i = 0; if (n_ssids) { @@ -10284,9 +10265,7 @@ skip_beacons: goto free; } - wdev_lock(wdev); err = rdev_channel_switch(rdev, dev, ¶ms); - wdev_unlock(wdev); free: kfree(params.beacon_after.mbssid_ies); @@ -10309,7 +10288,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, void *hdr; struct nlattr *bss; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); @@ -10372,7 +10351,6 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) || nla_put_u32(msg, NL80211_BSS_FREQUENCY_OFFSET, res->channel->freq_offset) || - nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) || nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO, jiffies_to_msecs(jiffies - intbss->ts))) goto nla_put_failure; @@ -10458,7 +10436,6 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); - wdev_lock(wdev); spin_lock_bh(&rdev->bss_lock); /* @@ -10484,7 +10461,6 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) } spin_unlock_bh(&rdev->bss_lock); - wdev_unlock(wdev); cb->args[2] = idx; wiphy_unlock(&rdev->wiphy); @@ -10607,9 +10583,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) } while (1) { - wdev_lock(wdev); res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey); - wdev_unlock(wdev); if (res == -ENOENT) break; if (res) @@ -10782,9 +10756,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (!req.bss) return -ENOENT; - wdev_lock(dev->ieee80211_ptr); err = cfg80211_mlme_auth(rdev, dev, &req); - wdev_unlock(dev->ieee80211_ptr); cfg80211_put_bss(&rdev->wiphy, req.bss); @@ -10994,8 +10966,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, req.ie, req.ie_len)) { - GENL_SET_ERR_MSG(info, - "non-inheritance makes no sense"); + NL_SET_ERR_MSG_ATTR(info->extack, + info->attrs[NL80211_ATTR_IE], + "non-inheritance makes no sense"); return -EINVAL; } } @@ -11120,6 +11093,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (!attrs[NL80211_ATTR_MLO_LINK_ID]) { err = -EINVAL; + NL_SET_BAD_ATTR(info->extack, link); goto free; } @@ -11127,6 +11101,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) /* cannot use the same link ID again */ if (req.links[link_id].bss) { err = -EINVAL; + NL_SET_BAD_ATTR(info->extack, link); goto free; } req.links[link_id].bss = @@ -11134,6 +11109,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(req.links[link_id].bss)) { err = PTR_ERR(req.links[link_id].bss); req.links[link_id].bss = NULL; + NL_SET_ERR_MSG_ATTR(info->extack, + link, "Error fetching BSS for link"); goto free; } @@ -11146,8 +11123,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (cfg80211_find_elem(WLAN_EID_FRAGMENT, req.links[link_id].elems, req.links[link_id].elems_len)) { - GENL_SET_ERR_MSG(info, - "cannot deal with fragmentation"); + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[NL80211_ATTR_IE], + "cannot deal with fragmentation"); err = -EINVAL; goto free; } @@ -11155,8 +11133,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, req.links[link_id].elems, req.links[link_id].elems_len)) { - GENL_SET_ERR_MSG(info, - "cannot deal with non-inheritance"); + NL_SET_ERR_MSG_ATTR(info->extack, + attrs[NL80211_ATTR_IE], + "cannot deal with non-inheritance"); err = -EINVAL; goto free; } @@ -11199,7 +11178,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { - wdev_lock(dev->ieee80211_ptr); + struct nlattr *link; + int rem = 0; err = cfg80211_mlme_assoc(rdev, dev, &req); @@ -11210,7 +11190,33 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) ap_addr, ETH_ALEN); } - wdev_unlock(dev->ieee80211_ptr); + /* Report error from first problematic link */ + if (info->attrs[NL80211_ATTR_MLO_LINKS]) { + nla_for_each_nested(link, + info->attrs[NL80211_ATTR_MLO_LINKS], + rem) { + struct nlattr *link_id_attr = + nla_find_nested(link, NL80211_ATTR_MLO_LINK_ID); + + if (!link_id_attr) + continue; + + link_id = nla_get_u8(link_id_attr); + + if (link_id == req.link_id) + continue; + + if (!req.links[link_id].error || + WARN_ON(req.links[link_id].error > 0)) + continue; + + WARN_ON(err >= 0); + + NL_SET_BAD_ATTR(info->extack, link); + err = req.links[link_id].error; + break; + } + } } free: @@ -11227,7 +11233,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0, err; + int ie_len = 0; u16 reason_code; bool local_state_change; @@ -11263,11 +11269,8 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - wdev_lock(dev->ieee80211_ptr); - err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); - wdev_unlock(dev->ieee80211_ptr); - return err; + return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); } static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) @@ -11275,7 +11278,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0, err; + int ie_len = 0; u16 reason_code; bool local_state_change; @@ -11311,11 +11314,8 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - wdev_lock(dev->ieee80211_ptr); - err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); - wdev_unlock(dev->ieee80211_ptr); - return err; + return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); } static bool @@ -11493,13 +11493,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.userspace_handles_dfs = nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); - wdev_lock(dev->ieee80211_ptr); err = __cfg80211_join_ibss(rdev, dev, &ibss, connkeys); if (err) kfree_sensitive(connkeys); else if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; - wdev_unlock(dev->ieee80211_ptr); return err; } @@ -12032,8 +12030,6 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (nla_get_flag(info->attrs[NL80211_ATTR_MLO_SUPPORT])) connect.flags |= CONNECT_REQ_MLO_SUPPORT; - wdev_lock(dev->ieee80211_ptr); - err = cfg80211_connect(rdev, dev, &connect, connkeys, connect.prev_bssid); if (err) @@ -12048,8 +12044,6 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) eth_zero_addr(dev->ieee80211_ptr->disconnect_bssid); } - wdev_unlock(dev->ieee80211_ptr); - return err; } @@ -12063,7 +12057,6 @@ static int nl80211_update_connect_params(struct sk_buff *skb, bool fils_sk_offload; u32 auth_type; u32 changed = 0; - int ret; if (!rdev->ops->update_connect_params) return -EOPNOTSUPP; @@ -12124,14 +12117,10 @@ static int nl80211_update_connect_params(struct sk_buff *skb, changed |= UPDATE_AUTH_TYPE; } - wdev_lock(dev->ieee80211_ptr); if (!wdev->connected) - ret = -ENOLINK; - else - ret = rdev_update_connect_params(rdev, dev, &connect, changed); - wdev_unlock(dev->ieee80211_ptr); + return -ENOLINK; - return ret; + return rdev_update_connect_params(rdev, dev, &connect, changed); } static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) @@ -12139,7 +12128,6 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 reason; - int ret; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) @@ -12157,10 +12145,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - wdev_lock(dev->ieee80211_ptr); - ret = cfg80211_disconnect(rdev, dev, reason, true); - wdev_unlock(dev->ieee80211_ptr); - return ret; + return cfg80211_disconnect(rdev, dev, reason, true); } static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) @@ -12371,7 +12356,6 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (err) return err; - wdev_lock(wdev); if (!cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { const struct cfg80211_chan_def *oper_chandef, *compat_chandef; @@ -12380,7 +12364,6 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (WARN_ON(!oper_chandef)) { /* cannot happen since we must beacon to get here */ WARN_ON(1); - wdev_unlock(wdev); return -EBUSY; } @@ -12388,12 +12371,9 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, compat_chandef = cfg80211_chandef_compatible(&chandef, oper_chandef); - if (compat_chandef != &chandef) { - wdev_unlock(wdev); + if (compat_chandef != &chandef) return -EBUSY; - } } - wdev_unlock(wdev); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) @@ -12452,23 +12432,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, unsigned int link_id = nl80211_link_id(info->attrs); struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; - wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev, true, link_id); if (err) - goto out; + return err; - err = rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask); -out: - wdev_unlock(wdev); - return err; + return rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask); } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) @@ -12597,12 +12572,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!chandef.chan && params.offchan) return -EINVAL; - wdev_lock(wdev); if (params.offchan && - !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { - wdev_unlock(wdev); + !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) return -EBUSY; - } params.link_id = nl80211_link_id_or_invalid(info->attrs); /* @@ -12611,11 +12583,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) * to the driver. */ if (params.link_id >= 0 && - !(wdev->valid_links & BIT(params.link_id))) { - wdev_unlock(wdev); + !(wdev->valid_links & BIT(params.link_id))) return -EINVAL; - } - wdev_unlock(wdev); params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); @@ -12883,8 +12852,8 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, struct cfg80211_cqm_config *cqm_config = NULL, *old; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - int i, err; s32 prev = S32_MIN; + int i, err; /* Check all values negative and sorted */ for (i = 0; i < n_thresholds; i++) { @@ -12901,38 +12870,28 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; - wdev_lock(wdev); - old = rcu_dereference_protected(wdev->cqm_config, - lockdep_is_held(&wdev->mtx)); + old = wiphy_dereference(wdev->wiphy, wdev->cqm_config); /* if already disabled just succeed */ - if (!n_thresholds && !old) { - err = 0; - goto unlock; - } + if (!n_thresholds && !old) + return 0; if (n_thresholds > 1) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST) || - !rdev->ops->set_cqm_rssi_range_config) { - err = -EOPNOTSUPP; - goto unlock; - } + !rdev->ops->set_cqm_rssi_range_config) + return -EOPNOTSUPP; } else { - if (!rdev->ops->set_cqm_rssi_config) { - err = -EOPNOTSUPP; - goto unlock; - } + if (!rdev->ops->set_cqm_rssi_config) + return -EOPNOTSUPP; } if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), GFP_KERNEL); - if (!cqm_config) { - err = -ENOMEM; - goto unlock; - } + if (!cqm_config) + return -ENOMEM; cqm_config->rssi_hyst = hysteresis; cqm_config->n_rssi_thresholds = n_thresholds; @@ -12965,8 +12924,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info, } else { kfree_rcu(old, rcu_head); } -unlock: - wdev_unlock(wdev); return err; } @@ -13150,11 +13107,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.control_port_over_nl80211 = true; } - wdev_lock(dev->ieee80211_ptr); err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; - wdev_unlock(dev->ieee80211_ptr); return err; } @@ -14098,21 +14053,13 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (tb[NL80211_REKEY_DATA_AKM]) rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]); - wdev_lock(wdev); - if (!wdev->connected) { - err = -ENOTCONN; - goto out; - } + if (!wdev->connected) + return -ENOTCONN; - if (!rdev->ops->set_rekey_data) { - err = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_rekey_data) + return -EOPNOTSUPP; - err = rdev_set_rekey_data(rdev, dev, &rekey_data); - out: - wdev_unlock(wdev); - return err; + return rdev_set_rekey_data(rdev, dev, &rekey_data); } static int nl80211_register_unexpected_frame(struct sk_buff *skb, @@ -15316,11 +15263,9 @@ static int nl80211_set_qos_map(struct sk_buff *skb, memcpy(qos_map->up, pos, IEEE80211_QOS_MAP_LEN_MIN); } - wdev_lock(dev->ieee80211_ptr); ret = nl80211_key_allowed(dev->ieee80211_ptr); if (!ret) ret = rdev_set_qos_map(rdev, dev, qos_map); - wdev_unlock(dev->ieee80211_ptr); kfree(qos_map); return ret; @@ -15334,7 +15279,6 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) const u8 *peer; u8 tsid, up; u16 admitted_time = 0; - int err; if (!(rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION)) return -EOPNOTSUPP; @@ -15364,34 +15308,25 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(wdev); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) break; - err = -ENOTCONN; - goto out; + return -ENOTCONN; default: - err = -EOPNOTSUPP; - goto out; + return -EOPNOTSUPP; } - err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time); - - out: - wdev_unlock(wdev); - return err; + return rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time); } static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *peer; u8 tsid; - int err; if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -15399,11 +15334,7 @@ static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); peer = nla_data(info->attrs[NL80211_ATTR_MAC]); - wdev_lock(wdev); - err = rdev_del_tx_ts(rdev, dev, tsid, peer); - wdev_unlock(wdev); - - return err; + return rdev_del_tx_ts(rdev, dev, tsid, peer); } static int nl80211_tdls_channel_switch(struct sk_buff *skb, @@ -15459,11 +15390,7 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb, addr = nla_data(info->attrs[NL80211_ATTR_MAC]); oper_class = nla_get_u8(info->attrs[NL80211_ATTR_OPER_CLASS]); - wdev_lock(wdev); - err = rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef); - wdev_unlock(wdev); - - return err; + return rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef); } static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, @@ -15471,7 +15398,6 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *addr; if (!rdev->ops->tdls_channel_switch || @@ -15492,9 +15418,7 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - wdev_lock(wdev); rdev_tdls_cancel_channel_switch(rdev, dev, addr); - wdev_unlock(wdev); return 0; } @@ -15527,7 +15451,6 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_pmk_conf pmk_conf = {}; - int ret; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) @@ -15540,34 +15463,24 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK]) return -EINVAL; - wdev_lock(wdev); - if (!wdev->connected) { - ret = -ENOTCONN; - goto out; - } + if (!wdev->connected) + return -ENOTCONN; pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) { - ret = -EINVAL; - goto out; - } + if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) + return -EINVAL; pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); if (pmk_conf.pmk_len != WLAN_PMK_LEN && - pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) { - ret = -EINVAL; - goto out; - } + pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) + return -EINVAL; if (info->attrs[NL80211_ATTR_PMKR0_NAME]) pmk_conf.pmk_r0_name = nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]); - ret = rdev_set_pmk(rdev, dev, &pmk_conf); -out: - wdev_unlock(wdev); - return ret; + return rdev_set_pmk(rdev, dev, &pmk_conf); } static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) @@ -15576,7 +15489,6 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *aa; - int ret; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) @@ -15589,12 +15501,8 @@ static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; - wdev_lock(wdev); aa = nla_data(info->attrs[NL80211_ATTR_MAC]); - ret = rdev_del_pmk(rdev, dev, aa); - wdev_unlock(wdev); - - return ret; + return rdev_del_pmk(rdev, dev, aa); } static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) @@ -15668,8 +15576,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(wdev); - switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: @@ -15678,21 +15584,16 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.current_bss) break; - err = -ENOTCONN; - goto out; + return -ENOTCONN; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) break; - err = -ENOTCONN; - goto out; + return -ENOTCONN; default: - err = -EOPNOTSUPP; - goto out; + return -EOPNOTSUPP; } - wdev_unlock(wdev); - buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); len = nla_len(info->attrs[NL80211_ATTR_FRAME]); dest = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -15708,9 +15609,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) if (!err && !dont_wait_for_ack) nl_set_extack_cookie_u64(info->extack, cookie); return err; - out: - wdev_unlock(wdev); - return err; } static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, @@ -15988,8 +15886,6 @@ static int nl80211_set_tid_config(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_MAC]) tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]); - wdev_lock(dev->ieee80211_ptr); - nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], rem_conf) { ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX, @@ -16011,7 +15907,6 @@ static int nl80211_set_tid_config(struct sk_buff *skb, bad_tid_conf: kfree(tid_config); - wdev_unlock(dev->ieee80211_ptr); return ret; } @@ -16108,9 +16003,7 @@ static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) params.counter_offset_presp = offset; } - wdev_lock(wdev); err = rdev_color_change(rdev, dev, ¶ms); - wdev_unlock(wdev); out: kfree(params.beacon_next.mbssid_ies); @@ -16166,7 +16059,6 @@ static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info) !is_valid_ether_addr(nla_data(info->attrs[NL80211_ATTR_MAC]))) return -EINVAL; - wdev_lock(wdev); wdev->valid_links |= BIT(link_id); ether_addr_copy(wdev->links[link_id].addr, nla_data(info->attrs[NL80211_ATTR_MAC])); @@ -16176,7 +16068,6 @@ static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info) wdev->valid_links &= ~BIT(link_id); eth_zero_addr(wdev->links[link_id].addr); } - wdev_unlock(wdev); return ret; } @@ -16198,9 +16089,7 @@ static int nl80211_remove_link(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - wdev_lock(wdev); cfg80211_remove_link(wdev, link_id); - wdev_unlock(wdev); return 0; } @@ -16290,14 +16179,10 @@ nl80211_add_mod_link_station(struct sk_buff *skb, struct genl_info *info, if (err) return err; - wdev_lock(dev->ieee80211_ptr); if (add) - err = rdev_add_link_station(rdev, dev, ¶ms); - else - err = rdev_mod_link_station(rdev, dev, ¶ms); - wdev_unlock(dev->ieee80211_ptr); + return rdev_add_link_station(rdev, dev, ¶ms); - return err; + return rdev_mod_link_station(rdev, dev, ¶ms); } static int @@ -16318,7 +16203,6 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) struct link_station_del_parameters params = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - int ret; if (!rdev->ops->del_link_station) return -EOPNOTSUPP; @@ -16330,11 +16214,7 @@ nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]); - wdev_lock(dev->ieee80211_ptr); - ret = rdev_del_link_station(rdev, dev, ¶ms); - wdev_unlock(dev->ieee80211_ptr); - - return ret; + return rdev_del_link_station(rdev, dev, ¶ms); } static int nl80211_set_hw_timestamp(struct sk_buff *skb, @@ -17936,7 +17816,7 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, - struct cfg80211_rx_assoc_resp *data) + struct cfg80211_rx_assoc_resp_data *data) { nl80211_send_mlme_event(rdev, netdev, data->buf, data->len, NL80211_CMD_ASSOCIATE, GFP_KERNEL, @@ -18261,7 +18141,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, } void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, + struct net_device *netdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len) { struct sk_buff *msg; @@ -18279,7 +18159,7 @@ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer_addr)) goto nla_put_failure; if ((td_bitmap_len > 0) && td_bitmap) @@ -18342,7 +18222,7 @@ void cfg80211_links_removed(struct net_device *dev, u16 link_mask) struct nlattr *links; void *hdr; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_links_removed(dev, link_mask); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && @@ -19145,11 +19025,9 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) struct sk_buff *msg; s32 rssi_level; - wdev_lock(wdev); - cqm_config = rcu_dereference_protected(wdev->cqm_config, - lockdep_is_held(&wdev->mtx)); + cqm_config = wiphy_dereference(wdev->wiphy, wdev->cqm_config); if (!cqm_config) - goto unlock; + return; if (cqm_config->use_range_api) cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); @@ -19159,7 +19037,7 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL); if (!msg) - goto unlock; + return; if (nla_put_u32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, rssi_event)) @@ -19171,12 +19049,10 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) cfg80211_send_cqm(msg, GFP_KERNEL); - goto unlock; + return; nla_put_failure: nlmsg_free(msg); - unlock: - wdev_unlock(wdev); } void cfg80211_cqm_txe_notify(struct net_device *dev, @@ -19420,7 +19296,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev, struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); trace_cfg80211_ch_switch_notify(dev, chandef, link_id, punct_bitmap); @@ -19465,7 +19341,7 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev, struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id, @@ -19488,7 +19364,7 @@ int cfg80211_bss_color_notify(struct net_device *dev, struct sk_buff *msg; void *hdr; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b4af53f9b2..aad40240d9 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -60,7 +60,7 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, const u8 *buf, size_t len, gfp_t gfp); void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, - struct cfg80211_rx_assoc_resp *data); + struct cfg80211_rx_assoc_resp_data *data); void nl80211_send_deauth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, @@ -82,8 +82,11 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_roam_info *info, gfp_t gfp); +/* For STA/GC, indicate port authorized with AP/GO bssid. + * For GO/AP, use peer GC/STA mac_addr. + */ void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, + struct net_device *netdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len); void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c index 29afaf3da5..7d2d67f13a 100644 --- a/net/wireless/ocb.c +++ b/net/wireless/ocb.c @@ -4,7 +4,7 @@ * * Copyright: (c) 2014 Czech Technical University in Prague * (c) 2014 Volkswagen Group Research - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz> * Funded by: Volkswagen Group Research */ @@ -15,14 +15,14 @@ #include "core.h" #include "rdev-ops.h" -int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ocb_setup *setup) +int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ocb_setup *setup) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) return -EOPNOTSUPP; @@ -40,27 +40,13 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, return err; } -int cfg80211_join_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ocb_setup *setup) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_join_ocb(rdev, dev, setup); - wdev_unlock(wdev); - - return err; -} - -int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev) +int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, + struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) return -EOPNOTSUPP; @@ -77,16 +63,3 @@ int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, return err; } - -int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, - struct net_device *dev) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_leave_ocb(rdev, dev); - wdev_unlock(wdev); - - return err; -} diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 9611aa0bd0..e106dcea39 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -600,7 +600,7 @@ static void cfg80211_pmsr_process_abort(struct wireless_dev *wdev) struct cfg80211_pmsr_request *req, *tmp; LIST_HEAD(free_list); - lockdep_assert_held(&wdev->mtx); + lockdep_assert_wiphy(wdev->wiphy); spin_lock_bh(&wdev->pmsr_lock); list_for_each_entry_safe(req, tmp, &wdev->pmsr_list, list) { @@ -623,9 +623,7 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) pmsr_free_wk); wiphy_lock(wdev->wiphy); - wdev_lock(wdev); cfg80211_pmsr_process_abort(wdev); - wdev_unlock(wdev); wiphy_unlock(wdev->wiphy); } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 90bb7ac4b9..2214a90cf1 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -173,7 +173,7 @@ static inline int rdev_start_ap(struct cfg80211_registered_device *rdev, static inline int rdev_change_beacon(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct cfg80211_beacon_data *info) + struct cfg80211_ap_update *info) { int ret; trace_rdev_change_beacon(&rdev->wiphy, dev, info); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0317cf9da3..2ef4f6cc7a 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1283,7 +1283,9 @@ static bool is_valid_rd(const struct ieee80211_regdomain *rd) * 60 GHz band. * This resolution can be lowered and should be considered as we add * regulatory rule support for other "bands". - **/ + * + * Returns: whether or not the frequency is in the range + */ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, u32 freq_khz) { @@ -1492,6 +1494,8 @@ static void add_rule(struct ieee80211_reg_rule *rule, * Returns a pointer to the regulatory domain structure which will hold the * resulting intersection of rules between rd1 and rd2. We will * kzalloc() this structure for you. + * + * Returns: the intersected regdomain */ static struct ieee80211_regdomain * regdom_intersect(const struct ieee80211_regdomain *rd1, @@ -1589,6 +1593,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_320MHZ; if (rd_flags & NL80211_RRF_NO_EHT) channel_flags |= IEEE80211_CHAN_NO_EHT; + if (rd_flags & NL80211_RRF_PSD) + channel_flags |= IEEE80211_CHAN_PSD; return channel_flags; } @@ -1795,6 +1801,9 @@ static void handle_channel_single_rule(struct wiphy *wiphy, chan->dfs_cac_ms = reg_rule->dfs_cac_ms; } + if (chan->flags & IEEE80211_CHAN_PSD) + chan->psd = reg_rule->psd; + return; } @@ -1815,6 +1824,9 @@ static void handle_channel_single_rule(struct wiphy *wiphy, chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; } + if (chan->flags & IEEE80211_CHAN_PSD) + chan->psd = reg_rule->psd; + if (chan->orig_mpwr) { /* * Devices that use REGULATORY_COUNTRY_IE_FOLLOW_POWER @@ -1884,6 +1896,12 @@ static void handle_channel_adjacent_rules(struct wiphy *wiphy, rrule2->dfs_cac_ms); } + if ((rrule1->flags & NL80211_RRF_PSD) && + (rrule2->flags & NL80211_RRF_PSD)) + chan->psd = min_t(s8, rrule1->psd, rrule2->psd); + else + chan->flags &= ~NL80211_RRF_PSD; + return; } @@ -2151,6 +2169,13 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) return false; } +static void reg_call_notifier(struct wiphy *wiphy, + struct regulatory_request *request) +{ + if (wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, request); +} + static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct reg_beacon *reg_beacon) { @@ -2158,6 +2183,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, struct ieee80211_channel *chan; bool channel_changed = false; struct ieee80211_channel chan_before; + struct regulatory_request *lr = get_last_request(); sband = wiphy->bands[reg_beacon->chan.band]; chan = &sband->channels[chan_idx]; @@ -2183,8 +2209,11 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx, channel_changed = true; } - if (channel_changed) + if (channel_changed) { nl80211_send_beacon_hint_event(wiphy, &chan_before, chan); + if (wiphy->flags & WIPHY_FLAG_CHANNEL_CHANGE_ON_BEACON) + reg_call_notifier(wiphy, lr); + } } /* @@ -2327,13 +2356,6 @@ static void reg_process_ht_flags(struct wiphy *wiphy) reg_process_ht_flags_band(wiphy, wiphy->bands[band]); } -static void reg_call_notifier(struct wiphy *wiphy, - struct regulatory_request *request) -{ - if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, request); -} - static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) { struct cfg80211_chan_def chandef = {}; @@ -2342,12 +2364,11 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) bool ret; int link; - wdev_lock(wdev); iftype = wdev->iftype; /* make sure the interface is active */ if (!wdev->netdev || !netif_running(wdev->netdev)) - goto wdev_inactive_unlock; + return true; for (link = 0; link < ARRAY_SIZE(wdev->links); link++) { struct ieee80211_channel *chan; @@ -2407,8 +2428,6 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) break; } - wdev_unlock(wdev); - switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: @@ -2429,16 +2448,8 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) default: break; } - - wdev_lock(wdev); } - wdev_unlock(wdev); - - return true; - -wdev_inactive_unlock: - wdev_unlock(wdev); return true; } @@ -2461,7 +2472,7 @@ static void reg_check_chans_work(struct work_struct *work) pr_debug("Verifying active interfaces after reg change\n"); rtnl_lock(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) + for_each_rdev(rdev) reg_leave_invalid_chans(&rdev->wiphy); rtnl_unlock(); @@ -2515,7 +2526,7 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { wiphy = &rdev->wiphy; wiphy_update_regulatory(wiphy, initiator); } @@ -2577,6 +2588,9 @@ static void handle_channel_custom(struct wiphy *wiphy, chan->dfs_cac_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; } + if (chan->flags & IEEE80211_CHAN_PSD) + chan->psd = reg_rule->psd; + chan->max_power = chan->max_reg_power; } @@ -2663,6 +2677,9 @@ static void reg_set_request_processed(void) * * The wireless subsystem can use this function to process * a regulatory request issued by the regulatory core. + * + * Returns: %REG_REQ_OK or %REG_REQ_IGNORE, indicating if the + * hint was processed or ignored */ static enum reg_request_treatment reg_process_hint_core(struct regulatory_request *core_request) @@ -2719,6 +2736,9 @@ __reg_process_hint_user(struct regulatory_request *user_request) * * The wireless subsystem can use this function to process * a regulatory request initiated by userspace. + * + * Returns: %REG_REQ_OK or %REG_REQ_IGNORE, indicating if the + * hint was processed or ignored */ static enum reg_request_treatment reg_process_hint_user(struct regulatory_request *user_request) @@ -2774,7 +2794,7 @@ __reg_process_hint_driver(struct regulatory_request *driver_request) * The wireless subsystem can use this function to process * a regulatory request issued by an 802.11 driver. * - * Returns one of the different reg request treatment values. + * Returns: one of the different reg request treatment values. */ static enum reg_request_treatment reg_process_hint_driver(struct wiphy *wiphy, @@ -2878,7 +2898,7 @@ __reg_process_hint_country_ie(struct wiphy *wiphy, * The wireless subsystem can use this function to process * a regulatory request issued by a country Information Element. * - * Returns one of the different reg request treatment values. + * Returns: one of the different reg request treatment values. */ static enum reg_request_treatment reg_process_hint_country_ie(struct wiphy *wiphy, @@ -2991,7 +3011,7 @@ static void wiphy_all_share_dfs_chan_state(struct wiphy *wiphy) ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (wiphy == &rdev->wiphy) continue; wiphy_share_dfs_chan_state(wiphy, &rdev->wiphy); @@ -3057,7 +3077,7 @@ static void notify_self_managed_wiphys(struct regulatory_request *request) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { wiphy = &rdev->wiphy; if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && request->initiator == NL80211_REGDOM_SET_BY_USER) @@ -3122,7 +3142,7 @@ static void reg_process_pending_beacon_hints(void) list_del_init(&pending_beacon->list); /* Applies the beacon hint to current wiphys */ - list_for_each_entry(rdev, &cfg80211_rdev_list, list) + for_each_rdev(rdev) wiphy_update_new_beacon(&rdev->wiphy, pending_beacon); /* Remembers the beacon hint for new wiphys or reg changes */ @@ -3177,7 +3197,7 @@ static void reg_process_self_managed_hints(void) ASSERT_RTNL(); - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { wiphy_lock(&rdev->wiphy); reg_process_self_managed_hint(&rdev->wiphy); wiphy_unlock(&rdev->wiphy); @@ -3517,7 +3537,7 @@ static void restore_regulatory_settings(bool reset_user, bool cached) world_alpha2[0] = cfg80211_world_regdom->alpha2[0]; world_alpha2[1] = cfg80211_world_regdom->alpha2[1]; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) continue; if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG) @@ -3574,15 +3594,15 @@ static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - wdev_lock(wdev); if (!(wdev->wiphy->regulatory_flags & flag)) { - wdev_unlock(wdev); + wiphy_unlock(&rdev->wiphy); return false; } - wdev_unlock(wdev); } + wiphy_unlock(&rdev->wiphy); } return true; @@ -3838,7 +3858,7 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, { const struct ieee80211_regdomain *regd; const struct ieee80211_regdomain *intersected_rd = NULL; - const struct ieee80211_regdomain *tmp; + const struct ieee80211_regdomain *tmp = NULL; struct wiphy *request_wiphy; if (is_world_regdom(rd->alpha2)) @@ -3861,10 +3881,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, if (!driver_request->intersect) { ASSERT_RTNL(); wiphy_lock(request_wiphy); - if (request_wiphy->regd) { - wiphy_unlock(request_wiphy); - return -EALREADY; - } + if (request_wiphy->regd) + tmp = get_wiphy_regdom(request_wiphy); regd = reg_copy_regd(rd); if (IS_ERR(regd)) { @@ -3873,6 +3891,7 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, } rcu_assign_pointer(request_wiphy->regd, regd); + rcu_free_regdom(tmp); wiphy_unlock(request_wiphy); reset_regdomains(false, rd); return 0; @@ -4244,7 +4263,7 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, if (WARN_ON(!cfg80211_chandef_valid(chandef))) return; - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { if (wiphy == &rdev->wiphy) continue; diff --git a/net/wireless/reg.h b/net/wireless/reg.h index f3707f7290..a703e53c23 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -5,7 +5,7 @@ /* * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019, 2023 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -133,7 +133,7 @@ void regulatory_hint_disconnect(void); /** * cfg80211_get_unii - get the U-NII band for the frequency * @freq: the frequency for which we want to get the UNII band. - + * * Get a value specifying the U-NII band frequency belongs to. * U-NII bands are defined by the FCC in C.F.R 47 part 15. * @@ -156,11 +156,11 @@ bool regulatory_indoor_allowed(void); /** * regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys - * @wiphy - wiphy on which radar is detected and the event will be propagated + * @wiphy: wiphy on which radar is detected and the event will be propagated * to other available wiphys having the same DFS domain - * @chandef - Channel definition of radar detected channel - * @dfs_state - DFS channel state to be set - * @event - Type of radar event which triggered this DFS state change + * @chandef: Channel definition of radar detected channel + * @dfs_state: DFS channel state to be set + * @event: Type of radar event which triggered this DFS state change * * This function should be called with rtnl lock held. */ @@ -171,8 +171,8 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, /** * reg_dfs_domain_same - Checks if both wiphy have same DFS domain configured - * @wiphy1 - wiphy it's dfs_region to be checked against that of wiphy2 - * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1 + * @wiphy1: wiphy it's dfs_region to be checked against that of wiphy2 + * @wiphy2: wiphy it's dfs_region to be checked against that of wiphy1 */ bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index bd4dd75e44..3f49f5c699 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -830,10 +830,47 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev) list_for_each_entry(intbss, &rdev->bss_list, list) { struct cfg80211_bss *res = &intbss->pub; const struct cfg80211_bss_ies *ies; + const struct element *ssid_elem; + struct cfg80211_colocated_ap *entry; + u32 s_ssid_tmp; + int ret; ies = rcu_access_pointer(res->ies); count += cfg80211_parse_colocated_ap(ies, &coloc_ap_list); + + /* In case the scan request specified a specific BSSID + * and the BSS is found and operating on 6GHz band then + * add this AP to the collocated APs list. + * This is relevant for ML probe requests when the lower + * band APs have not been discovered. + */ + if (is_broadcast_ether_addr(rdev_req->bssid) || + !ether_addr_equal(rdev_req->bssid, res->bssid) || + res->channel->band != NL80211_BAND_6GHZ) + continue; + + ret = cfg80211_calc_short_ssid(ies, &ssid_elem, + &s_ssid_tmp); + if (ret) + continue; + + entry = kzalloc(sizeof(*entry) + IEEE80211_MAX_SSID_LEN, + GFP_ATOMIC); + + if (!entry) + continue; + + memcpy(entry->bssid, res->bssid, ETH_ALEN); + entry->short_ssid = s_ssid_tmp; + memcpy(entry->ssid, ssid_elem->data, + ssid_elem->datalen); + entry->ssid_len = ssid_elem->datalen; + entry->short_ssid_valid = true; + entry->center_freq = res->channel->center_freq; + + list_add_tail(&entry->list, &coloc_ap_list); + count++; } spin_unlock_bh(&rdev->bss_lock); } @@ -1642,8 +1679,6 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, continue; if (bss->pub.channel != new->pub.channel) continue; - if (bss->pub.scan_width != new->pub.scan_width) - continue; if (rcu_access_pointer(bss->pub.beacon_ies)) continue; ies = rcu_access_pointer(bss->pub.ies); @@ -1690,6 +1725,61 @@ static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known, } } +static void cfg80211_check_stuck_ecsa(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *known, + const struct cfg80211_bss_ies *old) +{ + const struct ieee80211_ext_chansw_ie *ecsa; + const struct element *elem_new, *elem_old; + const struct cfg80211_bss_ies *new, *bcn; + + if (known->pub.proberesp_ecsa_stuck) + return; + + new = rcu_dereference_protected(known->pub.proberesp_ies, + lockdep_is_held(&rdev->bss_lock)); + if (WARN_ON(!new)) + return; + + if (new->tsf - old->tsf < USEC_PER_SEC) + return; + + elem_old = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN, + old->data, old->len); + if (!elem_old) + return; + + elem_new = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN, + new->data, new->len); + if (!elem_new) + return; + + bcn = rcu_dereference_protected(known->pub.beacon_ies, + lockdep_is_held(&rdev->bss_lock)); + if (bcn && + cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN, + bcn->data, bcn->len)) + return; + + if (elem_new->datalen != elem_old->datalen) + return; + if (elem_new->datalen < sizeof(struct ieee80211_ext_chansw_ie)) + return; + if (memcmp(elem_new->data, elem_old->data, elem_new->datalen)) + return; + + ecsa = (void *)elem_new->data; + + if (!ecsa->mode) + return; + + if (ecsa->new_ch_num != + ieee80211_frequency_to_channel(known->pub.channel->center_freq)) + return; + + known->pub.proberesp_ecsa_stuck = 1; +} + static bool cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *known, @@ -1709,9 +1799,13 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, /* Override possible earlier Beacon frame IEs */ rcu_assign_pointer(known->pub.ies, new->pub.proberesp_ies); - if (old) + if (old) { + cfg80211_check_stuck_ecsa(rdev, known, old); kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); - } else if (rcu_access_pointer(new->pub.beacon_ies)) { + } + } + + if (rcu_access_pointer(new->pub.beacon_ies)) { const struct cfg80211_bss_ies *old; if (known->pub.hidden_beacon_bss && @@ -1829,8 +1923,12 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev, list_add(&new->hidden_list, &hidden->hidden_list); hidden->refcount++; + + ies = (void *)rcu_access_pointer(new->pub.beacon_ies); rcu_assign_pointer(new->pub.beacon_ies, hidden->pub.beacon_ies); + if (ies) + kfree_rcu(ies, rcu_head); } } else { /* @@ -1940,8 +2038,7 @@ EXPORT_SYMBOL(cfg80211_get_ies_channel_number); */ static struct ieee80211_channel * cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, - struct ieee80211_channel *channel, - enum nl80211_bss_scan_width scan_width) + struct ieee80211_channel *channel) { u32 freq; int channel_number; @@ -1981,16 +2078,6 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, return channel; } - if (scan_width == NL80211_BSS_CHAN_WIDTH_10 || - scan_width == NL80211_BSS_CHAN_WIDTH_5) { - /* - * Ignore channel number in 5 and 10 MHz channels where there - * may not be an n:1 or 1:n mapping between frequencies and - * channel numbers. - */ - return channel; - } - /* * Use the channel determined through the payload channel number * instead of the RX channel reported by the driver. @@ -2050,14 +2137,12 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, channel = data->channel; if (!channel) channel = cfg80211_get_bss_channel(wiphy, data->ie, data->ielen, - drv_data->chan, - drv_data->scan_width); + drv_data->chan); if (!channel) return NULL; memcpy(tmp.pub.bssid, data->bssid, ETH_ALEN); tmp.pub.channel = channel; - tmp.pub.scan_width = drv_data->scan_width; if (data->bss_source != BSS_SOURCE_STA_PROFILE) tmp.pub.signal = drv_data->signal; else @@ -2833,8 +2918,7 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, variable = ext->u.s1g_beacon.variable; } - channel = cfg80211_get_bss_channel(wiphy, variable, - ielen, data->chan, data->scan_width); + channel = cfg80211_get_bss_channel(wiphy, variable, ielen, data->chan); if (!channel) return NULL; @@ -2887,7 +2971,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, tmp.pub.beacon_interval = beacon_int; tmp.pub.capability = capability; tmp.pub.channel = channel; - tmp.pub.scan_width = data->scan_width; tmp.pub.signal = data->signal; tmp.ts_boottime = data->boottime_ns; tmp.parent_tsf = data->parent_tsf; @@ -3441,59 +3524,63 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, cfg = (u8 *)ie + 2; memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, "Mesh Network Path Selection Protocol ID: " - "0x%02X", cfg[0]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Mesh Network Path Selection Protocol ID: 0x%02X", + cfg[0]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Path Selection Metric ID: 0x%02X", - cfg[1]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Path Selection Metric ID: 0x%02X", + cfg[1]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Congestion Control Mode ID: 0x%02X", - cfg[2]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Congestion Control Mode ID: 0x%02X", + cfg[2]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Synchronization ID: 0x%02X", cfg[3]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Synchronization ID: 0x%02X", + cfg[3]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Authentication ID: 0x%02X", cfg[4]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Authentication ID: 0x%02X", + cfg[4]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Formation Info: 0x%02X", cfg[5]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Formation Info: 0x%02X", + cfg[5]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; - sprintf(buf, "Capabilities: 0x%02X", cfg[6]); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, + "Capabilities: 0x%02X", + cfg[6]); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, @@ -3549,17 +3636,16 @@ ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, "tsf=%016llx", (unsigned long long)(ies->tsf)); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, "tsf=%016llx", + (unsigned long long)(ies->tsf)); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) goto unlock; memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, " Last beacon: %ums ago", - elapsed_jiffies_msecs(bss->ts)); - iwe.u.data.length = strlen(buf); + iwe.u.data.length = sprintf(buf, " Last beacon: %ums ago", + elapsed_jiffies_msecs(bss->ts)); current_ev = iwe_stream_add_point_check(info, current_ev, end_buf, &iwe, buf); if (IS_ERR(current_ev)) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 9bba233b5a..acfe66da71 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -67,7 +67,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) struct cfg80211_scan_request *request; int n_channels, err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (rdev->scan_req || rdev->scan_msg) return -EBUSY; @@ -151,7 +151,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev, struct cfg80211_assoc_request req = {}; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn) return 0; @@ -255,16 +255,13 @@ void cfg80211_conn_work(struct work_struct *work) if (!wdev->netdev) continue; - wdev_lock(wdev); - if (!netif_running(wdev->netdev)) { - wdev_unlock(wdev); + if (!netif_running(wdev->netdev)) continue; - } + if (!wdev->conn || - wdev->conn->state == CFG80211_CONN_CONNECTED) { - wdev_unlock(wdev); + wdev->conn->state == CFG80211_CONN_CONNECTED) continue; - } + if (wdev->conn->params.bssid) { memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); bssid = bssid_buf; @@ -279,7 +276,6 @@ void cfg80211_conn_work(struct work_struct *work) cr.timeout_reason = treason; __cfg80211_connect_result(wdev->netdev, &cr, false); } - wdev_unlock(wdev); } wiphy_unlock(&rdev->wiphy); @@ -300,7 +296,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel, wdev->conn->params.bssid, @@ -317,13 +313,13 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) return bss; } -static void __cfg80211_sme_scan_done(struct net_device *dev) +void cfg80211_sme_scan_done(struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_bss *bss; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn) return; @@ -339,15 +335,6 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) schedule_work(&rdev->conn_work); } -void cfg80211_sme_scan_done(struct net_device *dev) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - wdev_lock(wdev); - __cfg80211_sme_scan_done(dev); - wdev_unlock(wdev); -} - void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { struct wiphy *wiphy = wdev->wiphy; @@ -355,7 +342,7 @@ void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u16 status_code = le16_to_cpu(mgmt->u.auth.status_code); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED) return; @@ -702,14 +689,14 @@ static bool cfg80211_is_all_idle(void) * need not issue a disconnect hint and reset any info such * as chan dfs state, etc. */ - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + for_each_rdev(rdev) { + wiphy_lock(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - wdev_lock(wdev); if (wdev->conn || wdev->connected || cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; - wdev_unlock(wdev); } + wiphy_unlock(&rdev->wiphy); } return is_all_idle; @@ -761,7 +748,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *connected_addr; bool bss_not_found = false; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) @@ -1093,7 +1080,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev, unsigned int link; const u8 *connected_addr; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) @@ -1294,24 +1281,29 @@ out: } EXPORT_SYMBOL(cfg80211_roamed); -void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid, +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len) { - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && - wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT && + wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) return; - if (WARN_ON(!wdev->connected) || - WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, bssid))) - return; + if (wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) { + if (WARN_ON(!wdev->connected) || + WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, peer_addr))) + return; + } nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, - bssid, td_bitmap, td_bitmap_len); + peer_addr, td_bitmap, td_bitmap_len); } -void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, +void cfg80211_port_authorized(struct net_device *dev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -1319,7 +1311,7 @@ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, struct cfg80211_event *ev; unsigned long flags; - if (WARN_ON(!bssid)) + if (WARN_ON(!peer_addr)) return; ev = kzalloc(sizeof(*ev) + td_bitmap_len, gfp); @@ -1327,7 +1319,7 @@ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, return; ev->type = EVENT_PORT_AUTHORIZED; - memcpy(ev->pa.bssid, bssid, ETH_ALEN); + memcpy(ev->pa.peer_addr, peer_addr, ETH_ALEN); ev->pa.td_bitmap = ((u8 *)ev) + sizeof(*ev); ev->pa.td_bitmap_len = td_bitmap_len; memcpy((void *)ev->pa.td_bitmap, td_bitmap, td_bitmap_len); @@ -1353,7 +1345,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, union iwreq_data wrqu; #endif - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) @@ -1443,7 +1435,7 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); /* * If we have an ssid_len, we're trying to connect or are @@ -1549,7 +1541,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err = 0; - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; @@ -1585,19 +1577,18 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); wiphy_lock(wdev->wiphy); - wdev_lock(wdev); if (wdev->conn_owner_nlportid) { switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - __cfg80211_leave_ibss(rdev, wdev->netdev, false); + cfg80211_leave_ibss(rdev, wdev->netdev, false); break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, wdev->netdev, -1, false); + cfg80211_stop_ap(rdev, wdev->netdev, -1, false); break; case NL80211_IFTYPE_MESH_POINT: - __cfg80211_leave_mesh(rdev, wdev->netdev); + cfg80211_leave_mesh(rdev, wdev->netdev); break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: @@ -1622,6 +1613,5 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) } } - wdev_unlock(wdev); wiphy_unlock(wdev->wiphy); } diff --git a/net/wireless/tests/Makefile b/net/wireless/tests/Makefile new file mode 100644 index 0000000000..fa8e297bbc --- /dev/null +++ b/net/wireless/tests/Makefile @@ -0,0 +1,3 @@ +cfg80211-tests-y += module.o fragmentation.o + +obj-$(CONFIG_CFG80211_KUNIT_TEST) += cfg80211-tests.o diff --git a/net/wireless/tests/fragmentation.c b/net/wireless/tests/fragmentation.c new file mode 100644 index 0000000000..49a339ca88 --- /dev/null +++ b/net/wireless/tests/fragmentation.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for element fragmentation + * + * Copyright (C) 2023 Intel Corporation + */ +#include <linux/ieee80211.h> +#include <net/cfg80211.h> +#include <kunit/test.h> + +static void defragment_0(struct kunit *test) +{ + ssize_t ret; + static const u8 input[] = { + [0] = WLAN_EID_EXTENSION, + [1] = 254, + [2] = WLAN_EID_EXT_EHT_MULTI_LINK, + [27] = 27, + [123] = 123, + [254 + 2] = WLAN_EID_FRAGMENT, + [254 + 3] = 7, + [254 + 3 + 7] = 0, /* for size */ + }; + u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL); + + KUNIT_ASSERT_NOT_NULL(test, data); + + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, sizeof(input), + WLAN_EID_FRAGMENT); + KUNIT_EXPECT_EQ(test, ret, 253); + KUNIT_EXPECT_MEMEQ(test, data, input + 3, 253); +} + +static void defragment_1(struct kunit *test) +{ + ssize_t ret; + static const u8 input[] = { + [0] = WLAN_EID_EXTENSION, + [1] = 255, + [2] = WLAN_EID_EXT_EHT_MULTI_LINK, + [27] = 27, + [123] = 123, + [255 + 2] = WLAN_EID_FRAGMENT, + [255 + 3] = 7, + [255 + 3 + 1] = 0xaa, + [255 + 3 + 8] = WLAN_EID_FRAGMENT, /* not used */ + [255 + 3 + 9] = 1, + [255 + 3 + 10] = 0, /* for size */ + }; + u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL); + const struct element *elem; + int count = 0; + + KUNIT_ASSERT_NOT_NULL(test, data); + + for_each_element(elem, input, sizeof(input)) + count++; + + /* check the elements are right */ + KUNIT_ASSERT_EQ(test, count, 3); + + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, sizeof(input), + WLAN_EID_FRAGMENT); + /* this means the last fragment was not used */ + KUNIT_EXPECT_EQ(test, ret, 254 + 7); + KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254); + KUNIT_EXPECT_MEMEQ(test, data + 254, input + 255 + 4, 7); +} + +static void defragment_2(struct kunit *test) +{ + ssize_t ret; + static const u8 input[] = { + [0] = WLAN_EID_EXTENSION, + [1] = 255, + [2] = WLAN_EID_EXT_EHT_MULTI_LINK, + [27] = 27, + [123] = 123, + + [257 + 0] = WLAN_EID_FRAGMENT, + [257 + 1] = 255, + [257 + 20] = 0xaa, + + [2 * 257 + 0] = WLAN_EID_FRAGMENT, + [2 * 257 + 1] = 1, + [2 * 257 + 2] = 0xcc, + [2 * 257 + 3] = WLAN_EID_FRAGMENT, /* not used */ + [2 * 257 + 4] = 1, + [2 * 257 + 5] = 0, /* for size */ + }; + u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL); + const struct element *elem; + int count = 0; + + KUNIT_ASSERT_NOT_NULL(test, data); + + for_each_element(elem, input, sizeof(input)) + count++; + + /* check the elements are right */ + KUNIT_ASSERT_EQ(test, count, 4); + + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, sizeof(input), + WLAN_EID_FRAGMENT); + /* this means the last fragment was not used */ + KUNIT_EXPECT_EQ(test, ret, 254 + 255 + 1); + KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254); + KUNIT_EXPECT_MEMEQ(test, data + 254, input + 257 + 2, 255); + KUNIT_EXPECT_MEMEQ(test, data + 254 + 255, input + 2 * 257 + 2, 1); +} + +static void defragment_at_end(struct kunit *test) +{ + ssize_t ret; + static const u8 input[] = { + [0] = WLAN_EID_EXTENSION, + [1] = 255, + [2] = WLAN_EID_EXT_EHT_MULTI_LINK, + [27] = 27, + [123] = 123, + [255 + 2] = WLAN_EID_FRAGMENT, + [255 + 3] = 7, + [255 + 3 + 7] = 0, /* for size */ + }; + u8 *data = kunit_kzalloc(test, sizeof(input), GFP_KERNEL); + + KUNIT_ASSERT_NOT_NULL(test, data); + + ret = cfg80211_defragment_element((void *)input, + input, sizeof(input), + data, sizeof(input), + WLAN_EID_FRAGMENT); + KUNIT_EXPECT_EQ(test, ret, 254 + 7); + KUNIT_EXPECT_MEMEQ(test, data, input + 3, 254); + KUNIT_EXPECT_MEMEQ(test, data + 254, input + 255 + 4, 7); +} + +static struct kunit_case element_fragmentation_test_cases[] = { + KUNIT_CASE(defragment_0), + KUNIT_CASE(defragment_1), + KUNIT_CASE(defragment_2), + KUNIT_CASE(defragment_at_end), + {} +}; + +static struct kunit_suite element_fragmentation = { + .name = "cfg80211-element-defragmentation", + .test_cases = element_fragmentation_test_cases, +}; + +kunit_test_suite(element_fragmentation); diff --git a/net/wireless/tests/module.c b/net/wireless/tests/module.c new file mode 100644 index 0000000000..9ff7b2c123 --- /dev/null +++ b/net/wireless/tests/module.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This is just module boilerplate for the cfg80211 kunit module. + * + * Copyright (C) 2023 Intel Corporation + */ +#include <linux/module.h> + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("tests for cfg80211"); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 617c0d0dfa..30cd1bd58a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -615,49 +615,47 @@ TRACE_EVENT(rdev_start_ap, TRACE_EVENT(rdev_change_beacon, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct cfg80211_beacon_data *info), + struct cfg80211_ap_update *info), TP_ARGS(wiphy, netdev, info), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY __field(int, link_id) - __dynamic_array(u8, head, info ? info->head_len : 0) - __dynamic_array(u8, tail, info ? info->tail_len : 0) - __dynamic_array(u8, beacon_ies, info ? info->beacon_ies_len : 0) - __dynamic_array(u8, proberesp_ies, - info ? info->proberesp_ies_len : 0) - __dynamic_array(u8, assocresp_ies, - info ? info->assocresp_ies_len : 0) - __dynamic_array(u8, probe_resp, info ? info->probe_resp_len : 0) - ), - TP_fast_assign( - WIPHY_ASSIGN; - NETDEV_ASSIGN; - if (info) { - __entry->link_id = info->link_id; - if (info->head) - memcpy(__get_dynamic_array(head), info->head, - info->head_len); - if (info->tail) - memcpy(__get_dynamic_array(tail), info->tail, - info->tail_len); - if (info->beacon_ies) - memcpy(__get_dynamic_array(beacon_ies), - info->beacon_ies, info->beacon_ies_len); - if (info->proberesp_ies) - memcpy(__get_dynamic_array(proberesp_ies), - info->proberesp_ies, - info->proberesp_ies_len); - if (info->assocresp_ies) - memcpy(__get_dynamic_array(assocresp_ies), - info->assocresp_ies, - info->assocresp_ies_len); - if (info->probe_resp) - memcpy(__get_dynamic_array(probe_resp), - info->probe_resp, info->probe_resp_len); - } else { - __entry->link_id = -1; - } + __dynamic_array(u8, head, info->beacon.head_len) + __dynamic_array(u8, tail, info->beacon.tail_len) + __dynamic_array(u8, beacon_ies, info->beacon.beacon_ies_len) + __dynamic_array(u8, proberesp_ies, info->beacon.proberesp_ies_len) + __dynamic_array(u8, assocresp_ies, info->beacon.assocresp_ies_len) + __dynamic_array(u8, probe_resp, info->beacon.probe_resp_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->link_id = info->beacon.link_id; + if (info->beacon.head) + memcpy(__get_dynamic_array(head), + info->beacon.head, + info->beacon.head_len); + if (info->beacon.tail) + memcpy(__get_dynamic_array(tail), + info->beacon.tail, + info->beacon.tail_len); + if (info->beacon.beacon_ies) + memcpy(__get_dynamic_array(beacon_ies), + info->beacon.beacon_ies, + info->beacon.beacon_ies_len); + if (info->beacon.proberesp_ies) + memcpy(__get_dynamic_array(proberesp_ies), + info->beacon.proberesp_ies, + info->beacon.proberesp_ies_len); + if (info->beacon.assocresp_ies) + memcpy(__get_dynamic_array(assocresp_ies), + info->beacon.assocresp_ies, + info->beacon.assocresp_ies_len); + if (info->beacon.probe_resp) + memcpy(__get_dynamic_array(probe_resp), + info->beacon.probe_resp, + info->beacon.probe_resp_len); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id:%d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) @@ -1323,16 +1321,18 @@ TRACE_EVENT(rdev_deauth, NETDEV_ENTRY MAC_ENTRY(bssid) __field(u16, reason_code) + __field(bool, local_state_change) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(bssid, req->bssid); __entry->reason_code = req->reason_code; + __entry->local_state_change = req->local_state_change; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u", + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u, local_state_change:%d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, - __entry->reason_code) + __entry->reason_code, __entry->local_state_change) ); TRACE_EVENT(rdev_disassoc, @@ -2928,7 +2928,7 @@ DEFINE_EVENT(netdev_evt_only, cfg80211_send_rx_auth, TRACE_EVENT(cfg80211_send_rx_assoc, TP_PROTO(struct net_device *netdev, - struct cfg80211_rx_assoc_resp *data), + struct cfg80211_rx_assoc_resp_data *data), TP_ARGS(netdev, data), TP_STRUCT__entry( NETDEV_ENTRY @@ -3590,7 +3590,6 @@ TRACE_EVENT(cfg80211_inform_bss_frame, TP_STRUCT__entry( WIPHY_ENTRY CHAN_ENTRY - __field(enum nl80211_bss_scan_width, scan_width) __dynamic_array(u8, mgmt, len) __field(s32, signal) __field(u64, ts_boottime) @@ -3600,7 +3599,6 @@ TRACE_EVENT(cfg80211_inform_bss_frame, TP_fast_assign( WIPHY_ASSIGN; CHAN_ASSIGN(data->chan); - __entry->scan_width = data->scan_width; if (mgmt) memcpy(__get_dynamic_array(mgmt), mgmt, len); __entry->signal = data->signal; @@ -3609,8 +3607,8 @@ TRACE_EVENT(cfg80211_inform_bss_frame, MAC_ASSIGN(parent_bssid, data->parent_bssid); ), TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT - "(scan_width: %d) signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM", - WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width, + "signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM", + WIPHY_PR_ARG, CHAN_PR_ARG, __entry->signal, (unsigned long long)__entry->ts_boottime, (unsigned long long)__entry->parent_tsf, __entry->parent_bssid) diff --git a/net/wireless/util.c b/net/wireless/util.c index 1783ab9d57..626b858b4b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -43,8 +43,7 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); -u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, - enum nl80211_bss_scan_width scan_width) +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband) { struct ieee80211_rate *bitrates; u32 mandatory_rates = 0; @@ -54,15 +53,10 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband, if (WARN_ON(!sband)) return 1; - if (sband->band == NL80211_BAND_2GHZ) { - if (scan_width == NL80211_BSS_CHAN_WIDTH_5 || - scan_width == NL80211_BSS_CHAN_WIDTH_10) - mandatory_flag = IEEE80211_RATE_MANDATORY_G; - else - mandatory_flag = IEEE80211_RATE_MANDATORY_B; - } else { + if (sband->band == NL80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else mandatory_flag = IEEE80211_RATE_MANDATORY_A; - } bitrates = sband->bitrates; for (i = 0; i < sband->n_bitrates; i++) @@ -1044,7 +1038,6 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) list_del(&ev->list); spin_unlock_irqrestore(&wdev->event_lock, flags); - wdev_lock(wdev); switch (ev->type) { case EVENT_CONNECT_RESULT: __cfg80211_connect_result( @@ -1066,15 +1059,14 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) ev->ij.channel); break; case EVENT_STOPPED: - __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); + cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); break; case EVENT_PORT_AUTHORIZED: - __cfg80211_port_authorized(wdev, ev->pa.bssid, + __cfg80211_port_authorized(wdev, ev->pa.peer_addr, ev->pa.td_bitmap, ev->pa.td_bitmap_len); break; } - wdev_unlock(wdev); kfree(ev); @@ -1124,9 +1116,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; dev->ieee80211_ptr->use_4addr = false; - wdev_lock(dev->ieee80211_ptr); rdev_set_qos_map(rdev, dev, NULL); - wdev_unlock(dev->ieee80211_ptr); switch (otype) { case NL80211_IFTYPE_AP: @@ -1138,10 +1128,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - wdev_lock(dev->ieee80211_ptr); cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); - wdev_unlock(dev->ieee80211_ptr); break; case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ @@ -1972,6 +1960,35 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, } EXPORT_SYMBOL(ieee80211_ie_split_ric); +void ieee80211_fragment_element(struct sk_buff *skb, u8 *len_pos, u8 frag_id) +{ + unsigned int elem_len; + + if (!len_pos) + return; + + elem_len = skb->data + skb->len - len_pos - 1; + + while (elem_len > 255) { + /* this one is 255 */ + *len_pos = 255; + /* remaining data gets smaller */ + elem_len -= 255; + /* make space for the fragment ID/len in SKB */ + skb_put(skb, 2); + /* shift back the remaining data to place fragment ID/len */ + memmove(len_pos + 255 + 3, len_pos + 255 + 1, elem_len); + /* place the fragment ID */ + len_pos += 255 + 1; + *len_pos = frag_id; + /* and point to fragment length to update later */ + len_pos++; + } + + *len_pos = elem_len; +} +EXPORT_SYMBOL(ieee80211_fragment_element); + bool ieee80211_operating_class_to_band(u8 operating_class, enum nl80211_band *band) { @@ -1982,6 +1999,7 @@ bool ieee80211_operating_class_to_band(u8 operating_class, *band = NL80211_BAND_5GHZ; return true; case 131 ... 135: + case 137: *band = NL80211_BAND_6GHZ; return true; case 81: @@ -2647,12 +2665,12 @@ void cfg80211_remove_link(struct wireless_dev *wdev, unsigned int link_id) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, wdev->netdev, link_id, true); + cfg80211_stop_ap(rdev, wdev->netdev, link_id, true); break; default: /* per-link not relevant */ @@ -2677,12 +2695,10 @@ void cfg80211_remove_links(struct wireless_dev *wdev) if (wdev->iftype != NL80211_IFTYPE_AP) return; - wdev_lock(wdev); if (wdev->valid_links) { for_each_valid_link(wdev, link_id) cfg80211_remove_link(wdev, link_id); } - wdev_unlock(wdev); } int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index e3acfac743..2371069f3c 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,7 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2019-2022 Intel Corporation + * Copyright (C) 2019-2023 Intel Corporation */ #include <linux/export.h> @@ -227,7 +227,7 @@ EXPORT_WEXT_HANDLER(cfg80211_wext_giwrange); * cfg80211_wext_freq - get wext frequency for non-"auto" * @freq: the wext freq encoding * - * Returns a frequency, or a negative error code, or 0 for auto. + * Returns: a frequency, or a negative error code, or 0 for auto. */ int cfg80211_wext_freq(struct iw_freq *freq) { @@ -415,10 +415,10 @@ int cfg80211_wext_giwretry(struct net_device *dev, } EXPORT_WEXT_HANDLER(cfg80211_wext_giwretry); -static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool pairwise, - const u8 *addr, bool remove, bool tx_key, - int idx, struct key_params *params) +static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, + struct net_device *dev, bool pairwise, + const u8 *addr, bool remove, bool tx_key, + int idx, struct key_params *params) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err, i; @@ -471,7 +471,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, */ if (idx == wdev->wext.default_key && wdev->iftype == NL80211_IFTYPE_ADHOC) { - __cfg80211_leave_ibss(rdev, wdev->netdev, true); + cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } @@ -552,7 +552,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, */ if (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->wext.default_key == -1) { - __cfg80211_leave_ibss(rdev, wdev->netdev, true); + cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } err = rdev_set_default_key(rdev, dev, -1, idx, true, @@ -580,21 +580,6 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return 0; } -static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool pairwise, - const u8 *addr, bool remove, bool tx_key, - int idx, struct key_params *params) -{ - int err; - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_set_encryption(rdev, dev, pairwise, addr, - remove, tx_key, idx, params); - wdev_unlock(dev->ieee80211_ptr); - - return err; -} - static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *keybuf) @@ -639,7 +624,6 @@ static int cfg80211_wext_siwencode(struct net_device *dev, else if (erq->length == 0) { /* No key data - just set the default TX key index */ err = 0; - wdev_lock(wdev); if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) @@ -647,7 +631,6 @@ static int cfg80211_wext_siwencode(struct net_device *dev, true); if (!err) wdev->wext.default_key = idx; - wdev_unlock(wdev); goto out; } @@ -697,12 +680,8 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, !rdev->ops->set_default_key) return -EOPNOTSUPP; - wdev_lock(wdev); - if (wdev->valid_links) { - wdev_unlock(wdev); + if (wdev->valid_links) return -EOPNOTSUPP; - } - wdev_unlock(wdev); switch (ext->alg) { case IW_ENCODE_ALG_NONE: @@ -1341,13 +1320,11 @@ static int cfg80211_wext_giwrate(struct net_device *dev, return -EOPNOTSUPP; err = 0; - wdev_lock(wdev); if (!wdev->valid_links && wdev->links[0].client.current_bss) memcpy(addr, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); else err = -EOPNOTSUPP; - wdev_unlock(wdev); if (err) return err; @@ -1387,17 +1364,15 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) return NULL; /* Grab BSSID of current BSS, if any */ - wdev_lock(wdev); + wiphy_lock(&rdev->wiphy); if (wdev->valid_links || !wdev->links[0].client.current_bss) { - wdev_unlock(wdev); + wiphy_unlock(&rdev->wiphy); return NULL; } memcpy(bssid, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); - wdev_unlock(wdev); memset(&sinfo, 0, sizeof(sinfo)); - wiphy_lock(&rdev->wiphy); ret = rdev_get_station(rdev, dev, bssid, &sinfo); wiphy_unlock(&rdev->wiphy); diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index f3eaa33886..8edd9ada69 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -23,7 +23,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, int err, i; ASSERT_RTNL(); - ASSERT_WDEV_LOCK(wdev); + lockdep_assert_wiphy(wdev->wiphy); if (!netif_running(wdev->netdev)) return 0; @@ -87,15 +87,11 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, return -EINVAL; } - wdev_lock(wdev); - if (wdev->conn) { bool event = true; - if (wdev->wext.connect.channel == chan) { - err = 0; - goto out; - } + if (wdev->wext.connect.channel == chan) + return 0; /* if SSID set, we'll try right again, avoid event */ if (wdev->wext.connect.ssid_len) @@ -103,14 +99,11 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, event); if (err) - goto out; + return err; } wdev->wext.connect.channel = chan; - err = cfg80211_mgd_wext_connect(rdev, wdev); - out: - wdev_unlock(wdev); - return err; + return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwfreq(struct net_device *dev, @@ -127,12 +120,10 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev, if (wdev->valid_links) return -EOPNOTSUPP; - wdev_lock(wdev); if (wdev->links[0].client.current_bss) chan = wdev->links[0].client.current_bss->pub.channel; else if (wdev->wext.connect.channel) chan = wdev->wext.connect.channel; - wdev_unlock(wdev); if (chan) { freq->m = chan->center_freq; @@ -164,17 +155,13 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - wdev_lock(wdev); - - err = 0; - if (wdev->conn) { bool event = true; if (wdev->wext.connect.ssid && len && len == wdev->wext.connect.ssid_len && memcmp(wdev->wext.connect.ssid, ssid, len) == 0) - goto out; + return 0; /* if SSID set now, we'll try to connect, avoid event */ if (len) @@ -182,7 +169,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, event); if (err) - goto out; + return err; } wdev->wext.prev_bssid_valid = false; @@ -194,10 +181,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, wdev->wext.connect.crypto.control_port_ethertype = cpu_to_be16(ETH_P_PAE); - err = cfg80211_mgd_wext_connect(rdev, wdev); - out: - wdev_unlock(wdev); - return err; + return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwessid(struct net_device *dev, @@ -216,7 +200,6 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, data->flags = 0; - wdev_lock(wdev); if (wdev->links[0].client.current_bss) { const struct element *ssid_elem; @@ -238,7 +221,6 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, data->length = wdev->wext.connect.ssid_len; memcpy(ssid, wdev->wext.connect.ssid, data->length); } - wdev_unlock(wdev); return ret; } @@ -263,23 +245,20 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; - wdev_lock(wdev); - if (wdev->conn) { - err = 0; /* both automatic */ if (!bssid && !wdev->wext.connect.bssid) - goto out; + return 0; /* fixed already - and no change */ if (wdev->wext.connect.bssid && bssid && ether_addr_equal(bssid, wdev->wext.connect.bssid)) - goto out; + return 0; err = cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, false); if (err) - goto out; + return err; } if (bssid) { @@ -288,10 +267,7 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, } else wdev->wext.connect.bssid = NULL; - err = cfg80211_mgd_wext_connect(rdev, wdev); - out: - wdev_unlock(wdev); - return err; + return cfg80211_mgd_wext_connect(rdev, wdev); } int cfg80211_mgd_wext_giwap(struct net_device *dev, @@ -306,18 +282,15 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; - wdev_lock(wdev); - if (wdev->valid_links) { - wdev_unlock(wdev); + if (wdev->valid_links) return -EOPNOTSUPP; - } + if (wdev->links[0].client.current_bss) memcpy(ap_addr->sa_data, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); else eth_zero_addr(ap_addr->sa_data); - wdev_unlock(wdev); return 0; } @@ -339,7 +312,6 @@ int cfg80211_wext_siwgenie(struct net_device *dev, ie = NULL; wiphy_lock(wdev->wiphy); - wdev_lock(wdev); /* no change */ err = 0; @@ -370,7 +342,6 @@ int cfg80211_wext_siwgenie(struct net_device *dev, /* userspace better not think we'll reconnect */ err = 0; out: - wdev_unlock(wdev); wiphy_unlock(wdev->wiphy); return err; } @@ -396,7 +367,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev, return -EINVAL; wiphy_lock(&rdev->wiphy); - wdev_lock(wdev); switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: @@ -406,7 +376,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev, err = -EOPNOTSUPP; break; } - wdev_unlock(wdev); wiphy_unlock(&rdev->wiphy); return err; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 0fb5143bec..aad8ffeaee 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -598,7 +598,7 @@ static struct sock *x25_make_new(struct sock *osk) x25 = x25_sk(sk); sk->sk_type = osk->sk_type; - sk->sk_priority = osk->sk_priority; + sk->sk_priority = READ_ONCE(osk->sk_priority); sk->sk_protocol = osk->sk_protocol; sk->sk_rcvbuf = osk->sk_rcvbuf; sk->sk_sndbuf = osk->sk_sndbuf; diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d849dc04a3..da1582de6e 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -33,6 +33,7 @@ #include "xsk.h" #define TX_BATCH_SIZE 32 +#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); @@ -395,6 +396,16 @@ void __xsk_map_flush(void) } } +#ifdef CONFIG_DEBUG_NET +bool xsk_map_check_flush(void) +{ + if (list_empty(this_cpu_ptr(&xskmap_flush_list))) + return false; + __xsk_map_flush(); + return true; +} +#endif + void xsk_tx_completed(struct xsk_buff_pool *pool, u32 nb_entries) { xskq_prod_submit_n(pool->cq, nb_entries); @@ -417,16 +428,25 @@ EXPORT_SYMBOL(xsk_tx_release); bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) { + bool budget_exhausted = false; struct xdp_sock *xs; rcu_read_lock(); +again: list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { + if (xs->tx_budget_spent >= MAX_PER_SOCKET_BUDGET) { + budget_exhausted = true; + continue; + } + if (!xskq_cons_peek_desc(xs->tx, desc, pool)) { if (xskq_has_descs(xs->tx)) xskq_cons_release(xs->tx); continue; } + xs->tx_budget_spent++; + /* This is the backpressure mechanism for the Tx path. * Reserve space in the completion queue and only proceed * if there is space in it. This avoids having to implement @@ -440,6 +460,14 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc) return true; } + if (budget_exhausted) { + list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) + xs->tx_budget_spent = 0; + + budget_exhausted = false; + goto again; + } + out: rcu_read_unlock(); return false; @@ -683,12 +711,13 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, memcpy(vaddr, buffer, len); kunmap_local(vaddr); - skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); + skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE); + refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc); } } skb->dev = dev; - skb->priority = xs->sk.sk_priority; + skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; xsk_set_destructor_arg(skb); diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index 22b36c8143..9f89553672 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -211,4 +211,5 @@ static void __exit xsk_diag_exit(void) module_init(xsk_diag_init); module_exit(xsk_diag_exit); MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("XDP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP); diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index 3adf31a83a..d7b16f2c23 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -15,6 +15,7 @@ config XFRM_ALGO tristate select XFRM select CRYPTO + select CRYPTO_AEAD select CRYPTO_HASH select CRYPTO_SKCIPHER diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 094734fbec..41533c6314 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -5,6 +5,7 @@ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> */ +#include <crypto/aead.h> #include <crypto/hash.h> #include <crypto/skcipher.h> #include <linux/module.h> @@ -644,38 +645,33 @@ static inline int calg_entries(void) } struct xfrm_algo_list { + int (*find)(const char *name, u32 type, u32 mask); struct xfrm_algo_desc *algs; int entries; - u32 type; - u32 mask; }; static const struct xfrm_algo_list xfrm_aead_list = { + .find = crypto_has_aead, .algs = aead_list, .entries = ARRAY_SIZE(aead_list), - .type = CRYPTO_ALG_TYPE_AEAD, - .mask = CRYPTO_ALG_TYPE_MASK, }; static const struct xfrm_algo_list xfrm_aalg_list = { + .find = crypto_has_ahash, .algs = aalg_list, .entries = ARRAY_SIZE(aalg_list), - .type = CRYPTO_ALG_TYPE_HASH, - .mask = CRYPTO_ALG_TYPE_HASH_MASK, }; static const struct xfrm_algo_list xfrm_ealg_list = { + .find = crypto_has_skcipher, .algs = ealg_list, .entries = ARRAY_SIZE(ealg_list), - .type = CRYPTO_ALG_TYPE_SKCIPHER, - .mask = CRYPTO_ALG_TYPE_MASK, }; static const struct xfrm_algo_list xfrm_calg_list = { + .find = crypto_has_comp, .algs = calg_list, .entries = ARRAY_SIZE(calg_list), - .type = CRYPTO_ALG_TYPE_COMPRESS, - .mask = CRYPTO_ALG_TYPE_MASK, }; static struct xfrm_algo_desc *xfrm_find_algo( @@ -696,8 +692,7 @@ static struct xfrm_algo_desc *xfrm_find_algo( if (!probe) break; - status = crypto_has_alg(list[i].name, algo_list->type, - algo_list->mask); + status = algo_list->find(list[i].name, 0, 0); if (!status) break; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index d5ee96789d..bd4ce21d76 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -462,7 +462,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; - if (encap_type < 0) { + if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) { x = xfrm_input_state(skb); if (unlikely(x->km.state != XFRM_STATE_VALID)) { @@ -485,9 +485,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) seq = XFRM_SKB_CB(skb)->seq.input.low; goto resume; } - - /* encap_type < -1 indicates a GRO call. */ - encap_type = 0; + /* GRO call */ seq = XFRM_SPI_SKB_CB(skb)->seq; if (xo && (xo->flags & CRYPTO_DONE)) { diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c index d74f3fd20f..7d5e920141 100644 --- a/net/xfrm/xfrm_interface_bpf.c +++ b/net/xfrm/xfrm_interface_bpf.c @@ -27,9 +27,7 @@ struct bpf_xfrm_info { int link; }; -__diag_push(); -__diag_ignore_all("-Wmissing-prototypes", - "Global functions as their definitions will be in xfrm_interface BTF"); +__bpf_kfunc_start_defs(); /* bpf_skb_get_xfrm_info - Get XFRM metadata * @@ -93,7 +91,7 @@ __bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bp return 0; } -__diag_pop() +__bpf_kfunc_end_defs(); BTF_SET8_START(xfrm_ifc_kfunc_set) BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index e21cc71095..21d50d75c2 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -536,7 +536,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); if (!dst) { fl.u.ip6.flowi6_oif = dev->ifindex; fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; @@ -551,7 +551,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); if (!dst) { struct rtable *rt; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d24b4d4f62..c13dc3ef79 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -149,6 +149,21 @@ struct xfrm_pol_inexact_candidates { struct hlist_head *res[XFRM_POL_CAND_MAX]; }; +struct xfrm_flow_keys { + struct flow_dissector_key_basic basic; + struct flow_dissector_key_control control; + union { + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; + } addrs; + struct flow_dissector_key_ip ip; + struct flow_dissector_key_icmp icmp; + struct flow_dissector_key_ports ports; + struct flow_dissector_key_keyid gre; +}; + +static struct flow_dissector xfrm_session_dissector __ro_after_init; + static DEFINE_SPINLOCK(xfrm_if_cb_lock); static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly; @@ -2566,7 +2581,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) default: BUG(); } - xdst = dst_alloc(dst_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { memset_after(xdst, 0, u.dst); @@ -2858,7 +2873,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; - xfrm_decode_session(skb, &fl, dst->ops->family); + xfrm_decode_session(net, skb, &fl, dst->ops->family); skb->mark = skb_mark; spin_unlock(&pq->hold_queue.lock); @@ -2894,7 +2909,7 @@ static void xfrm_policy_queue_process(struct timer_list *t) /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; - xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); + xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family); skb->mark = skb_mark; dst_hold(xfrm_dst_path(skb_dst(skb))); @@ -3372,209 +3387,106 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star } static void -decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) +decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { - const struct iphdr *iph = ip_hdr(skb); - int ihl = iph->ihl; - u8 *xprth = skb_network_header(skb) + ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; - int oif = 0; - - if (skb_dst(skb) && skb_dst(skb)->dev) - oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); - fl4->flowi4_mark = skb->mark; - fl4->flowi4_oif = reverse ? skb->skb_iif : oif; - - fl4->flowi4_proto = iph->protocol; - fl4->daddr = reverse ? iph->saddr : iph->daddr; - fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; - - if (!ip_is_fragment(iph)) { - switch (iph->protocol) { - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (xprth + 4 < skb->data || - pskb_may_pull(skb, xprth + 4 - skb->data)) { - __be16 *ports; - - xprth = skb_network_header(skb) + ihl * 4; - ports = (__be16 *)xprth; - - fl4->fl4_sport = ports[!!reverse]; - fl4->fl4_dport = ports[!reverse]; - } - break; - case IPPROTO_ICMP: - if (xprth + 2 < skb->data || - pskb_may_pull(skb, xprth + 2 - skb->data)) { - u8 *icmp; - xprth = skb_network_header(skb) + ihl * 4; - icmp = xprth; + if (reverse) { + fl4->saddr = flkeys->addrs.ipv4.dst; + fl4->daddr = flkeys->addrs.ipv4.src; + fl4->fl4_sport = flkeys->ports.dst; + fl4->fl4_dport = flkeys->ports.src; + } else { + fl4->saddr = flkeys->addrs.ipv4.src; + fl4->daddr = flkeys->addrs.ipv4.dst; + fl4->fl4_sport = flkeys->ports.src; + fl4->fl4_dport = flkeys->ports.dst; + } - fl4->fl4_icmp_type = icmp[0]; - fl4->fl4_icmp_code = icmp[1]; - } - break; - case IPPROTO_GRE: - if (xprth + 12 < skb->data || - pskb_may_pull(skb, xprth + 12 - skb->data)) { - __be16 *greflags; - __be32 *gre_hdr; - - xprth = skb_network_header(skb) + ihl * 4; - greflags = (__be16 *)xprth; - gre_hdr = (__be32 *)xprth; - - if (greflags[0] & GRE_KEY) { - if (greflags[0] & GRE_CSUM) - gre_hdr++; - fl4->fl4_gre_key = gre_hdr[1]; - } - } - break; - default: - break; - } + switch (flkeys->basic.ip_proto) { + case IPPROTO_GRE: + fl4->fl4_gre_key = flkeys->gre.keyid; + break; + case IPPROTO_ICMP: + fl4->fl4_icmp_type = flkeys->icmp.type; + fl4->fl4_icmp_code = flkeys->icmp.code; + break; } + + fl4->flowi4_proto = flkeys->basic.ip_proto; + fl4->flowi4_tos = flkeys->ip.tos; } #if IS_ENABLED(CONFIG_IPV6) static void -decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) +decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { struct flowi6 *fl6 = &fl->u.ip6; - int onlyproto = 0; - const struct ipv6hdr *hdr = ipv6_hdr(skb); - u32 offset = sizeof(*hdr); - struct ipv6_opt_hdr *exthdr; - const unsigned char *nh = skb_network_header(skb); - u16 nhoff = IP6CB(skb)->nhoff; - int oif = 0; - u8 nexthdr; - - if (!nhoff) - nhoff = offsetof(struct ipv6hdr, nexthdr); - - nexthdr = nh[nhoff]; - - if (skb_dst(skb) && skb_dst(skb)->dev) - oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); - fl6->flowi6_mark = skb->mark; - fl6->flowi6_oif = reverse ? skb->skb_iif : oif; - - fl6->daddr = reverse ? hdr->saddr : hdr->daddr; - fl6->saddr = reverse ? hdr->daddr : hdr->saddr; - - while (nh + offset + sizeof(*exthdr) < skb->data || - pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { - nh = skb_network_header(skb); - exthdr = (struct ipv6_opt_hdr *)(nh + offset); - - switch (nexthdr) { - case NEXTHDR_FRAGMENT: - onlyproto = 1; - fallthrough; - case NEXTHDR_ROUTING: - case NEXTHDR_HOP: - case NEXTHDR_DEST: - offset += ipv6_optlen(exthdr); - nexthdr = exthdr->nexthdr; - break; - case IPPROTO_UDP: - case IPPROTO_UDPLITE: - case IPPROTO_TCP: - case IPPROTO_SCTP: - case IPPROTO_DCCP: - if (!onlyproto && (nh + offset + 4 < skb->data || - pskb_may_pull(skb, nh + offset + 4 - skb->data))) { - __be16 *ports; - - nh = skb_network_header(skb); - ports = (__be16 *)(nh + offset); - fl6->fl6_sport = ports[!!reverse]; - fl6->fl6_dport = ports[!reverse]; - } - fl6->flowi6_proto = nexthdr; - return; - case IPPROTO_ICMPV6: - if (!onlyproto && (nh + offset + 2 < skb->data || - pskb_may_pull(skb, nh + offset + 2 - skb->data))) { - u8 *icmp; - - nh = skb_network_header(skb); - icmp = (u8 *)(nh + offset); - fl6->fl6_icmp_type = icmp[0]; - fl6->fl6_icmp_code = icmp[1]; - } - fl6->flowi6_proto = nexthdr; - return; - case IPPROTO_GRE: - if (!onlyproto && - (nh + offset + 12 < skb->data || - pskb_may_pull(skb, nh + offset + 12 - skb->data))) { - struct gre_base_hdr *gre_hdr; - __be32 *gre_key; - - nh = skb_network_header(skb); - gre_hdr = (struct gre_base_hdr *)(nh + offset); - gre_key = (__be32 *)(gre_hdr + 1); - - if (gre_hdr->flags & GRE_KEY) { - if (gre_hdr->flags & GRE_CSUM) - gre_key++; - fl6->fl6_gre_key = *gre_key; - } - } - fl6->flowi6_proto = nexthdr; - return; -#if IS_ENABLED(CONFIG_IPV6_MIP6) - case IPPROTO_MH: - offset += ipv6_optlen(exthdr); - if (!onlyproto && (nh + offset + 3 < skb->data || - pskb_may_pull(skb, nh + offset + 3 - skb->data))) { - struct ip6_mh *mh; - - nh = skb_network_header(skb); - mh = (struct ip6_mh *)(nh + offset); - fl6->fl6_mh_type = mh->ip6mh_type; - } - fl6->flowi6_proto = nexthdr; - return; -#endif - default: - fl6->flowi6_proto = nexthdr; - return; - } + if (reverse) { + fl6->saddr = flkeys->addrs.ipv6.dst; + fl6->daddr = flkeys->addrs.ipv6.src; + fl6->fl6_sport = flkeys->ports.dst; + fl6->fl6_dport = flkeys->ports.src; + } else { + fl6->saddr = flkeys->addrs.ipv6.src; + fl6->daddr = flkeys->addrs.ipv6.dst; + fl6->fl6_sport = flkeys->ports.src; + fl6->fl6_dport = flkeys->ports.dst; + } + + switch (flkeys->basic.ip_proto) { + case IPPROTO_GRE: + fl6->fl6_gre_key = flkeys->gre.keyid; + break; + case IPPROTO_ICMPV6: + fl6->fl6_icmp_type = flkeys->icmp.type; + fl6->fl6_icmp_code = flkeys->icmp.code; + break; } + + fl6->flowi6_proto = flkeys->basic.ip_proto; } #endif -int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, +int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { + struct xfrm_flow_keys flkeys; + + memset(&flkeys, 0, sizeof(flkeys)); + __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys, + NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + switch (family) { case AF_INET: - decode_session4(skb, fl, reverse); + decode_session4(&flkeys, fl, reverse); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - decode_session6(skb, fl, reverse); + decode_session6(&flkeys, fl, reverse); break; #endif default: return -EAFNOSUPPORT; } + fl->flowi_mark = skb->mark; + if (reverse) { + fl->flowi_oif = skb->skb_iif; + } else { + int oif = 0; + + if (skb_dst(skb) && skb_dst(skb)->dev) + oif = skb_dst(skb)->dev->ifindex; + + fl->flowi_oif = oif; + } + return security_xfrm_decode_session(skb, &fl->flowi_secid); } EXPORT_SYMBOL(__xfrm_decode_session); @@ -3623,7 +3535,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; - if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { + if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -3779,7 +3691,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) struct dst_entry *dst; int res = 1; - if (xfrm_decode_session(skb, &fl, family) < 0) { + if (xfrm_decode_session(net, skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } @@ -4258,8 +4170,47 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { .exit = xfrm_net_exit, }; +static const struct flow_dissector_key xfrm_flow_dissector_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct xfrm_flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct xfrm_flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct xfrm_flow_keys, ports), + }, + { + .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, + .offset = offsetof(struct xfrm_flow_keys, gre), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IP, + .offset = offsetof(struct xfrm_flow_keys, ip), + }, + { + .key_id = FLOW_DISSECTOR_KEY_ICMP, + .offset = offsetof(struct xfrm_flow_keys, icmp), + }, +}; + void __init xfrm_init(void) { + skb_flow_dissector_init(&xfrm_session_dissector, + xfrm_flow_dissector_keys, + ARRAY_SIZE(xfrm_flow_dissector_keys)); + register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); xfrm_input_init(); |