diff options
Diffstat (limited to 'debian/patches-rt/0205-irqwork-push-most-work-into-softirq-context.patch')
-rw-r--r-- | debian/patches-rt/0205-irqwork-push-most-work-into-softirq-context.patch | 264 |
1 files changed, 264 insertions, 0 deletions
diff --git a/debian/patches-rt/0205-irqwork-push-most-work-into-softirq-context.patch b/debian/patches-rt/0205-irqwork-push-most-work-into-softirq-context.patch new file mode 100644 index 000000000..964c52d2e --- /dev/null +++ b/debian/patches-rt/0205-irqwork-push-most-work-into-softirq-context.patch @@ -0,0 +1,264 @@ +From c0405a2b67942dd7f8564a25fa5580c8f2774765 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 23 Jun 2015 15:32:51 +0200 +Subject: [PATCH 205/347] irqwork: push most work into softirq context +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.246-rt110.tar.xz + +Initially we defered all irqwork into softirq because we didn't want the +latency spikes if perf or another user was busy and delayed the RT task. +The NOHZ trigger (nohz_full_kick_work) was the first user that did not work +as expected if it did not run in the original irqwork context so we had to +bring it back somehow for it. push_irq_work_func is the second one that +requires this. + +This patch adds the IRQ_WORK_HARD_IRQ which makes sure the callback runs +in raw-irq context. Everything else is defered into softirq context. Without +-RT we have the orignal behavior. + +This patch incorporates tglx orignal work which revoked a little bringing back +the arch_irq_work_raise() if possible and a few fixes from Steven Rostedt and +Mike Galbraith, + +[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a + hard and soft variant] +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/irq_work.h | 8 +++++ + kernel/irq_work.c | 75 ++++++++++++++++++++++++++++++---------- + kernel/rcu/tree.c | 1 + + kernel/sched/topology.c | 1 + + kernel/time/tick-sched.c | 1 + + kernel/time/timer.c | 2 ++ + 6 files changed, 70 insertions(+), 18 deletions(-) + +diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h +index b11fcdfd0770..0c50559987c5 100644 +--- a/include/linux/irq_work.h ++++ b/include/linux/irq_work.h +@@ -18,6 +18,8 @@ + + /* Doesn't want IPI, wait for tick: */ + #define IRQ_WORK_LAZY BIT(2) ++/* Run hard IRQ context, even on RT */ ++#define IRQ_WORK_HARD_IRQ BIT(3) + + #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) + +@@ -52,4 +54,10 @@ static inline bool irq_work_needs_cpu(void) { return false; } + static inline void irq_work_run(void) { } + #endif + ++#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) ++void irq_work_tick_soft(void); ++#else ++static inline void irq_work_tick_soft(void) { } ++#endif ++ + #endif /* _LINUX_IRQ_WORK_H */ +diff --git a/kernel/irq_work.c b/kernel/irq_work.c +index 73288914ed5e..2940622da5b3 100644 +--- a/kernel/irq_work.c ++++ b/kernel/irq_work.c +@@ -17,6 +17,7 @@ + #include <linux/cpu.h> + #include <linux/notifier.h> + #include <linux/smp.h> ++#include <linux/interrupt.h> + #include <asm/processor.h> + + +@@ -57,29 +58,35 @@ void __weak arch_irq_work_raise(void) + } + + /* Enqueue on current CPU, work must already be claimed and preempt disabled */ +-static void __irq_work_queue_local(struct irq_work *work) ++static void __irq_work_queue_local(struct irq_work *work, struct llist_head *list) + { +- /* If the work is "lazy", handle it from next tick if any */ +- if (work->flags & IRQ_WORK_LAZY) { +- if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && +- tick_nohz_tick_stopped()) +- arch_irq_work_raise(); +- } else { +- if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) +- arch_irq_work_raise(); +- } ++ bool empty; ++ ++ empty = llist_add(&work->llnode, list); ++ ++ if (empty && ++ (!(work->flags & IRQ_WORK_LAZY) || ++ tick_nohz_tick_stopped())) ++ arch_irq_work_raise(); + } + + /* Enqueue the irq work @work on the current CPU */ + bool irq_work_queue(struct irq_work *work) + { ++ struct llist_head *list; ++ + /* Only queue if not already pending */ + if (!irq_work_claim(work)) + return false; + + /* Queue the entry and raise the IPI if needed. */ + preempt_disable(); +- __irq_work_queue_local(work); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) ++ list = this_cpu_ptr(&lazy_list); ++ else ++ list = this_cpu_ptr(&raised_list); ++ ++ __irq_work_queue_local(work, list); + preempt_enable(); + + return true; +@@ -98,6 +105,9 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) + return irq_work_queue(work); + + #else /* CONFIG_SMP: */ ++ struct llist_head *list; ++ bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); ++ + /* All work should have been flushed before going offline */ + WARN_ON_ONCE(cpu_is_offline(cpu)); + +@@ -106,13 +116,21 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) + return false; + + preempt_disable(); ++ ++ lazy_work = work->flags & IRQ_WORK_LAZY; ++ ++ if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) ++ list = &per_cpu(lazy_list, cpu); ++ else ++ list = &per_cpu(raised_list, cpu); ++ + if (cpu != smp_processor_id()) { + /* Arch remote IPI send/receive backend aren't NMI safe */ + WARN_ON_ONCE(in_nmi()); +- if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) ++ if (llist_add(&work->llnode, list)) + arch_send_call_function_single_ipi(cpu); + } else { +- __irq_work_queue_local(work); ++ __irq_work_queue_local(work, list); + } + preempt_enable(); + +@@ -128,9 +146,8 @@ bool irq_work_needs_cpu(void) + raised = this_cpu_ptr(&raised_list); + lazy = this_cpu_ptr(&lazy_list); + +- if (llist_empty(raised) || arch_irq_work_has_interrupt()) +- if (llist_empty(lazy)) +- return false; ++ if (llist_empty(raised) && llist_empty(lazy)) ++ return false; + + /* All work should have been flushed before going offline */ + WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); +@@ -144,8 +161,12 @@ static void irq_work_run_list(struct llist_head *list) + struct llist_node *llnode; + unsigned long flags; + ++#ifndef CONFIG_PREEMPT_RT_FULL ++ /* ++ * nort: On RT IRQ-work may run in SOFTIRQ context. ++ */ + BUG_ON(!irqs_disabled()); +- ++#endif + if (llist_empty(list)) + return; + +@@ -177,7 +198,16 @@ static void irq_work_run_list(struct llist_head *list) + void irq_work_run(void) + { + irq_work_run_list(this_cpu_ptr(&raised_list)); +- irq_work_run_list(this_cpu_ptr(&lazy_list)); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { ++ /* ++ * NOTE: we raise softirq via IPI for safety, ++ * and execute in irq_work_tick() to move the ++ * overhead from hard to soft irq context. ++ */ ++ if (!llist_empty(this_cpu_ptr(&lazy_list))) ++ raise_softirq(TIMER_SOFTIRQ); ++ } else ++ irq_work_run_list(this_cpu_ptr(&lazy_list)); + } + EXPORT_SYMBOL_GPL(irq_work_run); + +@@ -187,8 +217,17 @@ void irq_work_tick(void) + + if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) + irq_work_run_list(raised); ++ ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) ++ irq_work_run_list(this_cpu_ptr(&lazy_list)); ++} ++ ++#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) ++void irq_work_tick_soft(void) ++{ + irq_work_run_list(this_cpu_ptr(&lazy_list)); + } ++#endif + + /* + * Synchronize against the irq_work @entry, ensures the entry is not +diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c +index f162a4f54b05..278fe66bfb70 100644 +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -1296,6 +1296,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) + !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && + (rnp->ffmask & rdp->grpmask)) { + init_irq_work(&rdp->rcu_iw, rcu_iw_handler); ++ rdp->rcu_iw.flags = IRQ_WORK_HARD_IRQ; + rdp->rcu_iw_pending = true; + rdp->rcu_iw_gp_seq = rnp->gp_seq; + irq_work_queue_on(&rdp->rcu_iw, rdp->cpu); +diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c +index 02e85cd233d4..208af1181bf5 100644 +--- a/kernel/sched/topology.c ++++ b/kernel/sched/topology.c +@@ -279,6 +279,7 @@ static int init_rootdomain(struct root_domain *rd) + rd->rto_cpu = -1; + raw_spin_lock_init(&rd->rto_lock); + init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); ++ rd->rto_push_work.flags |= IRQ_WORK_HARD_IRQ; + #endif + + init_dl_bw(&rd->dl_bw); +diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c +index 2b0ddd50e879..4d31ec98e968 100644 +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -236,6 +236,7 @@ static void nohz_full_kick_func(struct irq_work *work) + + static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { + .func = nohz_full_kick_func, ++ .flags = IRQ_WORK_HARD_IRQ, + }; + + /* +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index 3cb79167852f..c7bd68db6f63 100644 +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1747,6 +1747,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) + { + struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); + ++ irq_work_tick_soft(); ++ + __run_timers(base); + if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) + __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); +-- +2.36.1 + |