From: Sebastian Andrzej Siewior Date: Thu, 1 Feb 2024 15:39:56 +0100 Subject: [PATCH 15/15] net: Move per-CPU flush-lists to bpf_net_context on PREEMPT_RT. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.10/older/patches-6.10.2-rt14.tar.xz The per-CPU flush lists, which are accessed from within the NAPI callback (xdp_do_flush() for instance), are per-CPU. There are subject to the same problem as struct bpf_redirect_info. Add the per-CPU lists cpu_map_flush_list, dev_map_flush_list and xskmap_map_flush_list to struct bpf_net_context. Add wrappers for the access. The lists initialized on first usage (similar to bpf_net_ctx_get_ri()). Cc: "Björn Töpel" Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Eduard Zingerman Cc: Hao Luo Cc: Jesper Dangaard Brouer Cc: Jiri Olsa Cc: John Fastabend Cc: Jonathan Lemon Cc: KP Singh Cc: Maciej Fijalkowski Cc: Magnus Karlsson Cc: Martin KaFai Lau Cc: Song Liu Cc: Stanislav Fomichev Cc: Toke Høiland-Jørgensen Cc: Yonghong Song Cc: bpf@vger.kernel.org Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Sebastian Andrzej Siewior --- include/linux/filter.h | 42 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/cpumap.c | 19 +++---------------- kernel/bpf/devmap.c | 11 +++-------- net/xdp/xsk.c | 12 ++++-------- 4 files changed, 52 insertions(+), 32 deletions(-) --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -736,6 +736,9 @@ struct bpf_nh_params { /* flags for bpf_redirect_info kern_flags */ #define BPF_RI_F_RF_NO_DIRECT BIT(0) /* no napi_direct on return_frame */ #define BPF_RI_F_RI_INIT BIT(1) +#define BPF_RI_F_CPU_MAP_INIT BIT(2) +#define BPF_RI_F_DEV_MAP_INIT BIT(3) +#define BPF_RI_F_XSK_MAP_INIT BIT(4) struct bpf_redirect_info { u64 tgt_index; @@ -750,6 +753,9 @@ struct bpf_redirect_info { struct bpf_net_context { struct bpf_redirect_info ri; + struct list_head cpu_map_flush_list; + struct list_head dev_map_flush_list; + struct list_head xskmap_map_flush_list; }; static inline struct bpf_net_context *bpf_net_ctx_set(struct bpf_net_context *bpf_net_ctx) @@ -787,6 +793,42 @@ static inline struct bpf_redirect_info * return &bpf_net_ctx->ri; } +static inline struct list_head *bpf_net_ctx_get_cpu_map_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_CPU_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->cpu_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_CPU_MAP_INIT; + } + + return &bpf_net_ctx->cpu_map_flush_list; +} + +static inline struct list_head *bpf_net_ctx_get_dev_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_DEV_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->dev_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_DEV_MAP_INIT; + } + + return &bpf_net_ctx->dev_map_flush_list; +} + +static inline struct list_head *bpf_net_ctx_get_xskmap_flush_list(void) +{ + struct bpf_net_context *bpf_net_ctx = bpf_net_ctx_get(); + + if (!(bpf_net_ctx->ri.kern_flags & BPF_RI_F_XSK_MAP_INIT)) { + INIT_LIST_HEAD(&bpf_net_ctx->xskmap_map_flush_list); + bpf_net_ctx->ri.kern_flags |= BPF_RI_F_XSK_MAP_INIT; + } + + return &bpf_net_ctx->xskmap_map_flush_list; +} + /* Compute the linear packet data range [data, data_end) which * will be accessed by various program types (cls_bpf, act_bpf, * lwt, ...). Subsystems allowing direct data access must (!) --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -79,8 +79,6 @@ struct bpf_cpu_map { struct bpf_cpu_map_entry __rcu **cpu_map; }; -static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list); - static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; @@ -709,7 +707,7 @@ static void bq_flush_to_queue(struct xdp */ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { - struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) @@ -761,7 +759,7 @@ int cpu_map_generic_redirect(struct bpf_ void __cpu_map_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_cpu_map_flush_list(); struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -775,20 +773,9 @@ void __cpu_map_flush(void) #ifdef CONFIG_DEBUG_NET bool cpu_map_check_flush(void) { - if (list_empty(this_cpu_ptr(&cpu_map_flush_list))) + if (list_empty(bpf_net_ctx_get_cpu_map_flush_list())) return false; __cpu_map_flush(); return true; } #endif - -static int __init cpu_map_init(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu)); - return 0; -} - -subsys_initcall(cpu_map_init); --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -83,7 +83,6 @@ struct bpf_dtab { u32 n_buckets; }; -static DEFINE_PER_CPU(struct list_head, dev_flush_list); static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); @@ -415,7 +414,7 @@ static void bq_xmit_all(struct xdp_dev_b */ void __dev_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { @@ -429,7 +428,7 @@ void __dev_flush(void) #ifdef CONFIG_DEBUG_NET bool dev_check_flush(void) { - if (list_empty(this_cpu_ptr(&dev_flush_list))) + if (list_empty(bpf_net_ctx_get_dev_flush_list())) return false; __dev_flush(); return true; @@ -460,7 +459,7 @@ static void *__dev_map_lookup_elem(struc static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { - struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_dev_flush_list(); struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) @@ -1160,15 +1159,11 @@ static struct notifier_block dev_map_not static int __init dev_map_init(void) { - int cpu; - /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != offsetof(struct _bpf_dtab_netdev, dev)); register_netdevice_notifier(&dev_map_notifier); - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(dev_flush_list, cpu)); return 0; } --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -35,8 +35,6 @@ #define TX_BATCH_SIZE 32 #define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE) -static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); - void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool) { if (pool->cached_need_wakeup & XDP_WAKEUP_RX) @@ -372,7 +370,7 @@ static int xsk_rcv(struct xdp_sock *xs, int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { - struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); int err; err = xsk_rcv(xs, xdp); @@ -387,7 +385,7 @@ int __xsk_map_redirect(struct xdp_sock * void __xsk_map_flush(void) { - struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); + struct list_head *flush_list = bpf_net_ctx_get_xskmap_flush_list(); struct xdp_sock *xs, *tmp; list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { @@ -399,7 +397,7 @@ void __xsk_map_flush(void) #ifdef CONFIG_DEBUG_NET bool xsk_map_check_flush(void) { - if (list_empty(this_cpu_ptr(&xskmap_flush_list))) + if (list_empty(bpf_net_ctx_get_xskmap_flush_list())) return false; __xsk_map_flush(); return true; @@ -1772,7 +1770,7 @@ static struct pernet_operations xsk_net_ static int __init xsk_init(void) { - int err, cpu; + int err; err = proto_register(&xsk_proto, 0 /* no slab */); if (err) @@ -1790,8 +1788,6 @@ static int __init xsk_init(void) if (err) goto out_pernet; - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu)); return 0; out_pernet: