summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch')
-rw-r--r--debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch160
1 files changed, 160 insertions, 0 deletions
diff --git a/debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch b/debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch
new file mode 100644
index 000000000..e6e684ad5
--- /dev/null
+++ b/debian/patches-rt/0002-perf-Enqueue-SIGTRAP-always-via-task_work.patch
@@ -0,0 +1,160 @@
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Tue, 12 Mar 2024 19:01:50 +0100
+Subject: [PATCH 2/4] perf: Enqueue SIGTRAP always via task_work.
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.8/older/patches-6.8.2-rt11.tar.xz
+
+A signal is delivered by raising irq_work() which works from any context
+including NMI. irq_work() can be delayed if the architecture does not
+provide an interrupt vector. In order not to lose a signal, the signal
+is injected via task_work during event_sched_out().
+
+Instead going via irq_work, the signal could be added directly via
+task_work. The signal is sent to current and can be enqueued on its
+return path to userland instead of triggering irq_work. A dummy IRQ is
+required in the NMI case to ensure the task_work is handled before
+returning to user land. For this irq_work is used. An alternative would
+be just raising an interrupt like arch_send_call_function_single_ipi().
+
+During testing with `remove_on_exec' it become visible that the event
+can be enqueued via NMI during execve(). The task_work must not be kept
+because free_event() will complain later. Also the new task will not
+have a sighandler installed.
+
+Queue signal via task_work. Remove perf_event::pending_sigtrap and
+and use perf_event::pending_work instead. Raise irq_work in the NMI case
+for a dummy interrupt. Remove the task_work if the event is freed.
+
+Tested-by: Marco Elver <elver@google.com>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Link: https://lore.kernel.org/r/20240312180814.3373778-3-bigeasy@linutronix.de
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/perf_event.h | 3 --
+ kernel/events/core.c | 57 +++++++++++++++++++++++++--------------------
+ 2 files changed, 33 insertions(+), 27 deletions(-)
+
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -781,7 +781,6 @@ struct perf_event {
+ unsigned int pending_wakeup;
+ unsigned int pending_kill;
+ unsigned int pending_disable;
+- unsigned int pending_sigtrap;
+ unsigned long pending_addr; /* SIGTRAP */
+ struct irq_work pending_irq;
+ struct callback_head pending_task;
+@@ -959,7 +958,7 @@ struct perf_event_context {
+ struct rcu_head rcu_head;
+
+ /*
+- * Sum (event->pending_sigtrap + event->pending_work)
++ * Sum (event->pending_work + event->pending_work)
+ *
+ * The SIGTRAP is targeted at ctx->task, as such it won't do changing
+ * that until the signal is delivered.
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -2283,21 +2283,6 @@ event_sched_out(struct perf_event *event
+ state = PERF_EVENT_STATE_OFF;
+ }
+
+- if (event->pending_sigtrap) {
+- bool dec = true;
+-
+- event->pending_sigtrap = 0;
+- if (state != PERF_EVENT_STATE_OFF &&
+- !event->pending_work) {
+- event->pending_work = 1;
+- dec = false;
+- WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
+- task_work_add(current, &event->pending_task, TWA_RESUME);
+- }
+- if (dec)
+- local_dec(&event->ctx->nr_pending);
+- }
+-
+ perf_event_set_state(event, state);
+
+ if (!is_software_event(event))
+@@ -6741,11 +6726,6 @@ static void __perf_pending_irq(struct pe
+ * Yay, we hit home and are in the context of the event.
+ */
+ if (cpu == smp_processor_id()) {
+- if (event->pending_sigtrap) {
+- event->pending_sigtrap = 0;
+- perf_sigtrap(event);
+- local_dec(&event->ctx->nr_pending);
+- }
+ if (event->pending_disable) {
+ event->pending_disable = 0;
+ perf_event_disable_local(event);
+@@ -9592,14 +9572,23 @@ static int __perf_event_overflow(struct
+
+ if (regs)
+ pending_id = hash32_ptr((void *)instruction_pointer(regs)) ?: 1;
+- if (!event->pending_sigtrap) {
+- event->pending_sigtrap = pending_id;
++ if (!event->pending_work) {
++ event->pending_work = pending_id;
+ local_inc(&event->ctx->nr_pending);
+- irq_work_queue(&event->pending_irq);
++ WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
++ task_work_add(current, &event->pending_task, TWA_RESUME);
++ /*
++ * The NMI path returns directly to userland. The
++ * irq_work is raised as a dummy interrupt to ensure
++ * regular return path to user is taken and task_work
++ * is processed.
++ */
++ if (in_nmi())
++ irq_work_queue(&event->pending_irq);
+ } else if (event->attr.exclude_kernel && valid_sample) {
+ /*
+ * Should not be able to return to user space without
+- * consuming pending_sigtrap; with exceptions:
++ * consuming pending_work; with exceptions:
+ *
+ * 1. Where !exclude_kernel, events can overflow again
+ * in the kernel without returning to user space.
+@@ -9609,7 +9598,7 @@ static int __perf_event_overflow(struct
+ * To approximate progress (with false negatives),
+ * check 32-bit hash of the current IP.
+ */
+- WARN_ON_ONCE(event->pending_sigtrap != pending_id);
++ WARN_ON_ONCE(event->pending_work != pending_id);
+ }
+
+ event->pending_addr = 0;
+@@ -13049,6 +13038,13 @@ static void sync_child_event(struct perf
+ &parent_event->child_total_time_running);
+ }
+
++static bool task_work_cb_match(struct callback_head *cb, void *data)
++{
++ struct perf_event *event = container_of(cb, struct perf_event, pending_task);
++
++ return event == data;
++}
++
+ static void
+ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
+ {
+@@ -13088,6 +13084,17 @@ perf_event_exit_event(struct perf_event
+ * Kick perf_poll() for is_event_hup();
+ */
+ perf_event_wakeup(parent_event);
++ /*
++ * Cancel pending task_work and update counters if it has not
++ * yet been delivered to userland. free_event() expects the
++ * reference counter at 1 and keeping the event around until the
++ * task return to userland will be a unexpected.
++ */
++ if (event->pending_work &&
++ task_work_cancel_match(current, task_work_cb_match, event)) {
++ put_event(event);
++ local_dec(&event->ctx->nr_pending);
++ }
+ free_event(event);
+ put_event(parent_event);
+ return;