From: Sebastian Andrzej Siewior Date: Thu, 4 Jul 2024 19:03:36 +0200 Subject: [PATCH 2/7] task_work: Add TWA_NMI_CURRENT as an additional notify mode. Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.10/older/patches-6.10.2-rt14.tar.xz Adding task_work from NMI context requires the following: - The kasan_record_aux_stack() is not NMU safe and must be avoided. - Using TWA_RESUME is NMI safe. If the NMI occurs while the CPU is in userland then it will continue in userland and not invoke the `work' callback. Add TWA_NMI_CURRENT as an additional notify mode. In this mode skip kasan and use irq_work in hardirq-mode to for needed interrupt. Set TIF_NOTIFY_RESUME within the irq_work callback due to k[ac]san instrumentation in test_and_set_bit() which does not look NMI safe in case of a report. Suggested-by: Peter Zijlstra Link: https://lore.kernel.org/r/20240704170424.1466941-3-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior --- include/linux/task_work.h | 1 + kernel/task_work.c | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -18,6 +18,7 @@ enum task_work_notify_mode { TWA_RESUME, TWA_SIGNAL, TWA_SIGNAL_NO_IPI, + TWA_NMI_CURRENT, }; static inline bool task_work_pending(struct task_struct *task) --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -1,10 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include +#include static struct callback_head work_exited; /* all we need is ->next == NULL */ +static void task_work_set_notify_irq(struct irq_work *entry) +{ + test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); +} +static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = + IRQ_WORK_INIT_HARD(task_work_set_notify_irq); + /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback @@ -12,7 +21,7 @@ static struct callback_head work_exited; * @notify: how to notify the targeted task * * Queue @work for task_work_run() below and notify the @task if @notify - * is @TWA_RESUME, @TWA_SIGNAL, or @TWA_SIGNAL_NO_IPI. + * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT. * * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted * task and run the task_work, regardless of whether the task is currently @@ -24,6 +33,8 @@ static struct callback_head work_exited; * kernel anyway. * @TWA_RESUME work is run only when the task exits the kernel and returns to * user mode, or before entering guest mode. + * @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the + * current @task and if the current context is NMI. * * Fails if the @task is exiting/exited and thus it can't process this @work. * Otherwise @work->func() will be called when the @task goes through one of @@ -44,8 +55,13 @@ int task_work_add(struct task_struct *ta { struct callback_head *head; - /* record the work call stack in order to print it in KASAN reports */ - kasan_record_aux_stack(work); + if (notify == TWA_NMI_CURRENT) { + if (WARN_ON_ONCE(task != current)) + return -EINVAL; + } else { + /* record the work call stack in order to print it in KASAN reports */ + kasan_record_aux_stack(work); + } head = READ_ONCE(task->task_works); do { @@ -66,6 +82,9 @@ int task_work_add(struct task_struct *ta case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; + case TWA_NMI_CURRENT: + irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); + break; default: WARN_ON_ONCE(1); break;