diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:06:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:06:00 +0000 |
commit | b15a952c52a6825376d3e7f6c1bf5c886c6d8b74 (patch) | |
tree | 1500f2f8f276908a36d8126cb632c0d6b1276764 /debian/patches-rt/0308-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch | |
parent | Adding upstream version 5.10.209. (diff) | |
download | linux-b15a952c52a6825376d3e7f6c1bf5c886c6d8b74.tar.xz linux-b15a952c52a6825376d3e7f6c1bf5c886c6d8b74.zip |
Adding debian version 5.10.209-2.debian/5.10.209-2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'debian/patches-rt/0308-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch')
-rw-r--r-- | debian/patches-rt/0308-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch | 307 |
1 files changed, 307 insertions, 0 deletions
diff --git a/debian/patches-rt/0308-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch b/debian/patches-rt/0308-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch new file mode 100644 index 000000000..a6aea41c0 --- /dev/null +++ b/debian/patches-rt/0308-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch @@ -0,0 +1,307 @@ +From 2b803272d74039863a77523d79f79cc938eff7cf Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 24 Nov 2021 17:12:20 +0100 +Subject: [PATCH 308/323] irq_work: Handle some irq_work in a per-CPU thread on + PREEMPT_RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz + +The irq_work callback is invoked in hard IRQ context. By default all +callbacks are scheduled for invocation right away (given supported by +the architecture) except for the ones marked IRQ_WORK_LAZY which are +delayed until the next timer-tick. + +While looking over the callbacks, some of them may acquire locks +(spinlock_t, rwlock_t) which are transformed into sleeping locks on +PREEMPT_RT and must not be acquired in hard IRQ context. +Changing the locks into locks which could be acquired in this context +will lead to other problems such as increased latencies if everything +in the chain has IRQ-off locks. This will not solve all the issues as +one callback has been noticed which invoked kref_put() and its callback +invokes kfree() and this can not be invoked in hardirq context. + +Some callbacks are required to be invoked in hardirq context even on +PREEMPT_RT to work properly. This includes for instance the NO_HZ +callback which needs to be able to observe the idle context. + +The callbacks which require to be run in hardirq have already been +marked. Use this information to split the callbacks onto the two lists +on PREEMPT_RT: +- lazy_list + Work items which are not marked with IRQ_WORK_HARD_IRQ will be added + to this list. Callbacks on this list will be invoked from a per-CPU + thread. + The handler here may acquire sleeping locks such as spinlock_t and + invoke kfree(). + +- raised_list + Work items which are marked with IRQ_WORK_HARD_IRQ will be added to + this list. They will be invoked in hardirq context and must not + acquire any sleeping locks. + +The wake up of the per-CPU thread occurs from irq_work handler/ +hardirq context. The thread runs with lowest RT priority to ensure it +runs before any SCHED_OTHER tasks do. + +[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a + hard and soft variant. Collected fixes over time from Steven + Rostedt and Mike Galbraith. Move to per-CPU threads instead of + softirq as suggested by PeterZ.] + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Link: https://lkml.kernel.org/r/20211007092646.uhshe3ut2wkrcfzv@linutronix.de +Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +--- + include/linux/irq_work.h | 16 +++-- + kernel/irq_work.c | 131 ++++++++++++++++++++++++++++----------- + kernel/time/timer.c | 2 - + 3 files changed, 106 insertions(+), 43 deletions(-) + +diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h +index 3c6d3a96bca0..f551ba9c99d4 100644 +--- a/include/linux/irq_work.h ++++ b/include/linux/irq_work.h +@@ -40,6 +40,16 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) + .irqwait = __RCUWAIT_INITIALIZER(irqwait), \ + } + ++#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \ ++ .flags = ATOMIC_INIT(_flags), \ ++ .func = (_func), \ ++ .irqwait = __RCUWAIT_INITIALIZER(irqwait), \ ++} ++ ++#define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0) ++#define IRQ_WORK_INIT_LAZY(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_LAZY) ++#define IRQ_WORK_INIT_HARD(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_HARD_IRQ) ++ + static inline bool irq_work_is_busy(struct irq_work *work) + { + return atomic_read(&work->flags) & IRQ_WORK_BUSY; +@@ -63,10 +73,4 @@ static inline void irq_work_run(void) { } + static inline void irq_work_single(void *arg) { } + #endif + +-#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT) +-void irq_work_tick_soft(void); +-#else +-static inline void irq_work_tick_soft(void) { } +-#endif +- + #endif /* _LINUX_IRQ_WORK_H */ +diff --git a/kernel/irq_work.c b/kernel/irq_work.c +index 8969aff790e2..03d09d779ee1 100644 +--- a/kernel/irq_work.c ++++ b/kernel/irq_work.c +@@ -18,12 +18,37 @@ + #include <linux/cpu.h> + #include <linux/notifier.h> + #include <linux/smp.h> ++#include <linux/smpboot.h> + #include <linux/interrupt.h> + #include <asm/processor.h> + + + static DEFINE_PER_CPU(struct llist_head, raised_list); + static DEFINE_PER_CPU(struct llist_head, lazy_list); ++static DEFINE_PER_CPU(struct task_struct *, irq_workd); ++ ++static void wake_irq_workd(void) ++{ ++ struct task_struct *tsk = __this_cpu_read(irq_workd); ++ ++ if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) ++ wake_up_process(tsk); ++} ++ ++#ifdef CONFIG_SMP ++static void irq_work_wake(struct irq_work *entry) ++{ ++ wake_irq_workd(); ++} ++ ++static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = ++ IRQ_WORK_INIT_HARD(irq_work_wake); ++#endif ++ ++static int irq_workd_should_run(unsigned int cpu) ++{ ++ return !llist_empty(this_cpu_ptr(&lazy_list)); ++} + + /* + * Claim the entry so that no one else will poke at it. +@@ -54,20 +79,28 @@ void __weak arch_irq_work_raise(void) + static void __irq_work_queue_local(struct irq_work *work) + { + struct llist_head *list; +- bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT); +- +- lazy_work = atomic_read(&work->flags) & IRQ_WORK_LAZY; +- +- /* If the work is "lazy", handle it from next tick if any */ +- if (lazy_work || (realtime && !(atomic_read(&work->flags) & IRQ_WORK_HARD_IRQ))) ++ bool rt_lazy_work = false; ++ bool lazy_work = false; ++ int work_flags; ++ ++ work_flags = atomic_read(&work->flags); ++ if (work_flags & IRQ_WORK_LAZY) ++ lazy_work = true; ++ else if (IS_ENABLED(CONFIG_PREEMPT_RT) && ++ !(work_flags & IRQ_WORK_HARD_IRQ)) ++ rt_lazy_work = true; ++ ++ if (lazy_work || rt_lazy_work) + list = this_cpu_ptr(&lazy_list); + else + list = this_cpu_ptr(&raised_list); + +- if (llist_add(&work->llnode, list)) { +- if (!lazy_work || tick_nohz_tick_stopped()) +- arch_irq_work_raise(); +- } ++ if (!llist_add(&work->llnode, list)) ++ return; ++ ++ /* If the work is "lazy", handle it from next tick if any */ ++ if (!lazy_work || tick_nohz_tick_stopped()) ++ arch_irq_work_raise(); + } + + /* Enqueue the irq work @work on the current CPU */ +@@ -110,15 +143,27 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) + /* Arch remote IPI send/receive backend aren't NMI safe */ + WARN_ON_ONCE(in_nmi()); + +- if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(atomic_read(&work->flags) & IRQ_WORK_HARD_IRQ)) { +- if (llist_add(&work->llnode, &per_cpu(lazy_list, cpu))) +- arch_send_call_function_single_ipi(cpu); +- } else { +- __smp_call_single_queue(cpu, &work->llnode); ++ /* ++ * On PREEMPT_RT the items which are not marked as ++ * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work ++ * item is used on the remote CPU to wake the thread. ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ++ !(atomic_read(&work->flags) & IRQ_WORK_HARD_IRQ)) { ++ ++ if (!llist_add(&work->llnode, &per_cpu(lazy_list, cpu))) ++ goto out; ++ ++ work = &per_cpu(irq_work_wakeup, cpu); ++ if (!irq_work_claim(work)) ++ goto out; + } ++ ++ __smp_call_single_queue(cpu, &work->llnode); + } else { + __irq_work_queue_local(work); + } ++out: + preempt_enable(); + + return true; +@@ -175,12 +220,13 @@ static void irq_work_run_list(struct llist_head *list) + struct irq_work *work, *tmp; + struct llist_node *llnode; + +-#ifndef CONFIG_PREEMPT_RT + /* +- * nort: On RT IRQ-work may run in SOFTIRQ context. ++ * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed ++ * in a per-CPU thread in preemptible context. Only the items which are ++ * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. + */ +- BUG_ON(!irqs_disabled()); +-#endif ++ BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); ++ + if (llist_empty(list)) + return; + +@@ -196,16 +242,10 @@ static void irq_work_run_list(struct llist_head *list) + void irq_work_run(void) + { + irq_work_run_list(this_cpu_ptr(&raised_list)); +- if (IS_ENABLED(CONFIG_PREEMPT_RT)) { +- /* +- * NOTE: we raise softirq via IPI for safety, +- * and execute in irq_work_tick() to move the +- * overhead from hard to soft irq context. +- */ +- if (!llist_empty(this_cpu_ptr(&lazy_list))) +- raise_softirq(TIMER_SOFTIRQ); +- } else ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + irq_work_run_list(this_cpu_ptr(&lazy_list)); ++ else ++ wake_irq_workd(); + } + EXPORT_SYMBOL_GPL(irq_work_run); + +@@ -218,15 +258,10 @@ void irq_work_tick(void) + + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + irq_work_run_list(this_cpu_ptr(&lazy_list)); ++ else ++ wake_irq_workd(); + } + +-#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT) +-void irq_work_tick_soft(void) +-{ +- irq_work_run_list(this_cpu_ptr(&lazy_list)); +-} +-#endif +- + /* + * Synchronize against the irq_work @entry, ensures the entry is not + * currently in use. +@@ -246,3 +281,29 @@ void irq_work_sync(struct irq_work *work) + cpu_relax(); + } + EXPORT_SYMBOL_GPL(irq_work_sync); ++ ++static void run_irq_workd(unsigned int cpu) ++{ ++ irq_work_run_list(this_cpu_ptr(&lazy_list)); ++} ++ ++static void irq_workd_setup(unsigned int cpu) ++{ ++ sched_set_fifo_low(current); ++} ++ ++static struct smp_hotplug_thread irqwork_threads = { ++ .store = &irq_workd, ++ .setup = irq_workd_setup, ++ .thread_should_run = irq_workd_should_run, ++ .thread_fn = run_irq_workd, ++ .thread_comm = "irq_work/%u", ++}; ++ ++static __init int irq_work_init_threads(void) ++{ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); ++ return 0; ++} ++early_initcall(irq_work_init_threads); +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index 1cad0efd635c..a4fdc7cfb723 100644 +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1770,8 +1770,6 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) + { + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + +- irq_work_tick_soft(); +- + __run_timers(base); + if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) + __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); +-- +2.43.0 + |