From f7bf6055eb1e7ff603f0907b18ece35c72b05302 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 20:50:31 +0200 Subject: Adding debian version 6.8.9-1. Signed-off-by: Daniel Baumann --- ...perf-Enqueue-SIGTRAP-always-via-task_work.patch | 160 +++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch (limited to 'debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch') diff --git a/debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch b/debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch new file mode 100644 index 0000000000..e6e684ad51 --- /dev/null +++ b/debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch @@ -0,0 +1,160 @@ +From: Sebastian Andrzej Siewior +Date: Tue, 12 Mar 2024 19:01:50 +0100 +Subject: [PATCH 2/4] perf: Enqueue SIGTRAP always via task_work. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.8/older/patches-6.8.2-rt11.tar.xz + +A signal is delivered by raising irq_work() which works from any context +including NMI. irq_work() can be delayed if the architecture does not +provide an interrupt vector. In order not to lose a signal, the signal +is injected via task_work during event_sched_out(). + +Instead going via irq_work, the signal could be added directly via +task_work. The signal is sent to current and can be enqueued on its +return path to userland instead of triggering irq_work. A dummy IRQ is +required in the NMI case to ensure the task_work is handled before +returning to user land. For this irq_work is used. An alternative would +be just raising an interrupt like arch_send_call_function_single_ipi(). + +During testing with `remove_on_exec' it become visible that the event +can be enqueued via NMI during execve(). The task_work must not be kept +because free_event() will complain later. Also the new task will not +have a sighandler installed. + +Queue signal via task_work. Remove perf_event::pending_sigtrap and +and use perf_event::pending_work instead. Raise irq_work in the NMI case +for a dummy interrupt. Remove the task_work if the event is freed. + +Tested-by: Marco Elver +Tested-by: Arnaldo Carvalho de Melo +Reported-by: Arnaldo Carvalho de Melo +Link: https://lore.kernel.org/r/20240312180814.3373778-3-bigeasy@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior +--- + include/linux/perf_event.h | 3 -- + kernel/events/core.c | 57 +++++++++++++++++++++++++-------------------- + 2 files changed, 33 insertions(+), 27 deletions(-) + +--- a/include/linux/perf_event.h ++++ b/include/linux/perf_event.h +@@ -781,7 +781,6 @@ struct perf_event { + unsigned int pending_wakeup; + unsigned int pending_kill; + unsigned int pending_disable; +- unsigned int pending_sigtrap; + unsigned long pending_addr; /* SIGTRAP */ + struct irq_work pending_irq; + struct callback_head pending_task; +@@ -959,7 +958,7 @@ struct perf_event_context { + struct rcu_head rcu_head; + + /* +- * Sum (event->pending_sigtrap + event->pending_work) ++ * Sum (event->pending_work + event->pending_work) + * + * The SIGTRAP is targeted at ctx->task, as such it won't do changing + * that until the signal is delivered. +--- a/kernel/events/core.c ++++ b/kernel/events/core.c +@@ -2283,21 +2283,6 @@ event_sched_out(struct perf_event *event + state = PERF_EVENT_STATE_OFF; + } + +- if (event->pending_sigtrap) { +- bool dec = true; +- +- event->pending_sigtrap = 0; +- if (state != PERF_EVENT_STATE_OFF && +- !event->pending_work) { +- event->pending_work = 1; +- dec = false; +- WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); +- task_work_add(current, &event->pending_task, TWA_RESUME); +- } +- if (dec) +- local_dec(&event->ctx->nr_pending); +- } +- + perf_event_set_state(event, state); + + if (!is_software_event(event)) +@@ -6741,11 +6726,6 @@ static void __perf_pending_irq(struct pe + * Yay, we hit home and are in the context of the event. + */ + if (cpu == smp_processor_id()) { +- if (event->pending_sigtrap) { +- event->pending_sigtrap = 0; +- perf_sigtrap(event); +- local_dec(&event->ctx->nr_pending); +- } + if (event->pending_disable) { + event->pending_disable = 0; + perf_event_disable_local(event); +@@ -9592,14 +9572,23 @@ static int __perf_event_overflow(struct + + if (regs) + pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1; +- if (!event->pending_sigtrap) { +- event->pending_sigtrap = pending_id; ++ if (!event->pending_work) { ++ event->pending_work = pending_id; + local_inc(&event->ctx->nr_pending); +- irq_work_queue(&event->pending_irq); ++ WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); ++ task_work_add(current, &event->pending_task, TWA_RESUME); ++ /* ++ * The NMI path returns directly to userland. The ++ * irq_work is raised as a dummy interrupt to ensure ++ * regular return path to user is taken and task_work ++ * is processed. ++ */ ++ if (in_nmi()) ++ irq_work_queue(&event->pending_irq); + } else if (event->attr.exclude_kernel && valid_sample) { + /* + * Should not be able to return to user space without +- * consuming pending_sigtrap; with exceptions: ++ * consuming pending_work; with exceptions: + * + * 1. Where !exclude_kernel, events can overflow again + * in the kernel without returning to user space. +@@ -9609,7 +9598,7 @@ static int __perf_event_overflow(struct + * To approximate progress (with false negatives), + * check 32-bit hash of the current IP. + */ +- WARN_ON_ONCE(event->pending_sigtrap != pending_id); ++ WARN_ON_ONCE(event->pending_work != pending_id); + } + + event->pending_addr = 0; +@@ -13049,6 +13038,13 @@ static void sync_child_event(struct perf + &parent_event->child_total_time_running); + } + ++static bool task_work_cb_match(struct callback_head *cb, void *data) ++{ ++ struct perf_event *event = container_of(cb, struct perf_event, pending_task); ++ ++ return event == data; ++} ++ + static void + perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) + { +@@ -13088,6 +13084,17 @@ perf_event_exit_event(struct perf_event + * Kick perf_poll() for is_event_hup(); + */ + perf_event_wakeup(parent_event); ++ /* ++ * Cancel pending task_work and update counters if it has not ++ * yet been delivered to userland. free_event() expects the ++ * reference counter at 1 and keeping the event around until the ++ * task return to userland will be a unexpected. ++ */ ++ if (event->pending_work && ++ task_work_cancel_match(current, task_work_cb_match, event)) { ++ put_event(event); ++ local_dec(&event->ctx->nr_pending); ++ } + free_event(event); + put_event(parent_event); + return; -- cgit v1.2.3