From: Sebastian Andrzej Siewior Date: Thu, 4 Jul 2024 19:03:39 +0200 Subject: [PATCH 5/7] perf: Move swevent_htable::recursion into task_struct. Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.10/older/patches-6.10.2-rt14.tar.xz The swevent_htable::recursion counter is used to avoid creating an swevent while an event is processed to avoid recursion. The counter is per-CPU and preemption must be disabled to have a stable counter. perf_pending_task() disables preemption to access the counter and then signal. This is problematic on PREEMPT_RT because sending a signal uses a spinlock_t which must not be acquired in atomic on PREEMPT_RT because it becomes a sleeping lock. The atomic context can be avoided by moving the counter into the task_struct. There is a 4 byte hole between futex_state (usually always on) and the following perf pointer (perf_event_ctxp). After the recursion lost some weight it fits perfectly. Move swevent_htable::recursion into task_struct. Tested-by: Marco Elver Link: https://lore.kernel.org/r/20240704170424.1466941-6-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior --- include/linux/perf_event.h | 6 ------ include/linux/sched.h | 7 +++++++ kernel/events/core.c | 13 +++---------- kernel/events/internal.h | 2 +- 4 files changed, 11 insertions(+), 17 deletions(-) --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -970,12 +970,6 @@ struct perf_event_context { local_t nr_pending; }; -/* - * Number of contexts where an event can trigger: - * task, softirq, hardirq, nmi. - */ -#define PERF_NR_CONTEXTS 4 - struct perf_cpu_pmu_context { struct perf_event_pmu_context epc; struct perf_event_pmu_context *task_epc; --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -734,6 +734,12 @@ enum perf_event_task_context { perf_nr_task_contexts, }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + struct wake_q_node { struct wake_q_node *next; }; @@ -1256,6 +1262,7 @@ struct task_struct { unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS + u8 perf_recursion[PERF_NR_CONTEXTS]; struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9774,11 +9774,7 @@ struct swevent_htable { struct swevent_hlist *swevent_hlist; struct mutex hlist_mutex; int hlist_refcount; - - /* Recursion avoidance in each contexts */ - u8 recursion[PERF_NR_CONTEXTS]; }; - static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); /* @@ -9976,17 +9972,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_re int perf_swevent_get_recursion_context(void) { - struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); - - return get_recursion_context(swhash->recursion); + return get_recursion_context(current->perf_recursion); } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void perf_swevent_put_recursion_context(int rctx) { - struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); - - put_recursion_context(swhash->recursion, rctx); + put_recursion_context(current->perf_recursion, rctx); } void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) @@ -13653,6 +13645,7 @@ int perf_event_init_task(struct task_str { int ret; + memset(child->perf_recursion, 0, sizeof(child->perf_recursion)); child->perf_event_ctxp = NULL; mutex_init(&child->perf_event_mutex); INIT_LIST_HEAD(&child->perf_event_list); --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -221,7 +221,7 @@ static inline int get_recursion_context( return rctx; } -static inline void put_recursion_context(u8 *recursion, int rctx) +static inline void put_recursion_context(u8 *recursion, unsigned char rctx) { barrier(); recursion[rctx]--;