diff options
Diffstat (limited to 'debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch')
-rw-r--r-- | debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch b/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch new file mode 100644 index 000000000..68d9f5008 --- /dev/null +++ b/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch @@ -0,0 +1,97 @@ +From 9512a9467dec62e03f2df4f15af9a38332b8de58 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 23 Jun 2023 22:15:17 +0200 +Subject: [PATCH 58/62] mm/page_alloc: Use write_seqlock_irqsave() instead + write_seqlock() + local_irq_save(). +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +__build_all_zonelists() acquires zonelist_update_seq by first disabling +interrupts via local_irq_save() and then acquiring the seqlock with +write_seqlock(). This is troublesome and leads to problems on +PREEMPT_RT. The problem is that the inner spinlock_t becomes a sleeping +lock on PREEMPT_RT and must not be acquired with disabled interrupts. + +The API provides write_seqlock_irqsave() which does the right thing in +one step. +printk_deferred_enter() has to be invoked in non-migrate-able context to +ensure that deferred printing is enabled and disabled on the same CPU. +This is the case after zonelist_update_seq has been acquired. + +There was discussion on the first submission that the order should be: + local_irq_disable(); + printk_deferred_enter(); + write_seqlock(); + +to avoid pitfalls like having an unaccounted printk() coming from +write_seqlock_irqsave() before printk_deferred_enter() is invoked. The +only origin of such a printk() can be a lockdep splat because the +lockdep annotation happens after the sequence count is incremented. +This is exceptional and subject to change. + +It was also pointed that PREEMPT_RT can be affected by the printk +problem since its write_seqlock_irqsave() does not really disable +interrupts. This isn't the case because PREEMPT_RT's printk +implementation differs from the mainline implementation in two important +aspects: +- Printing happens in a dedicated threads and not at during the + invocation of printk(). +- In emergency cases where synchronous printing is used, a different + driver is used which does not use tty_port::lock. + +Acquire zonelist_update_seq with write_seqlock_irqsave() and then defer +printk output. + +Fixes: 1007843a91909 ("mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock") +Acked-by: Michal Hocko <mhocko@suse.com> +Reviewed-by: David Hildenbrand <david@redhat.com> +Link: https://lore.kernel.org/r/20230623201517.yw286Knb@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +(cherry picked from commit 4d1139baae8bc4fff3728d1d204bdb04c13dbe10) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + mm/page_alloc.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 4583f8a42d91..835b69a64f4f 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -6588,19 +6588,17 @@ static void __build_all_zonelists(void *data) + unsigned long flags; + + /* +- * Explicitly disable this CPU's interrupts before taking seqlock +- * to prevent any IRQ handler from calling into the page allocator +- * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock. ++ * The zonelist_update_seq must be acquired with irqsave because the ++ * reader can be invoked from IRQ with GFP_ATOMIC. + */ +- local_irq_save(flags); ++ write_seqlock_irqsave(&zonelist_update_seq, flags); + /* +- * Explicitly disable this CPU's synchronous printk() before taking +- * seqlock to prevent any printk() from trying to hold port->lock, for ++ * Also disable synchronous printk() to prevent any printk() from ++ * trying to hold port->lock, for + * tty_insert_flip_string_and_push_buffer() on other CPU might be + * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held. + */ + printk_deferred_enter(); +- write_seqlock(&zonelist_update_seq); + + #ifdef CONFIG_NUMA + memset(node_load, 0, sizeof(node_load)); +@@ -6637,9 +6635,8 @@ static void __build_all_zonelists(void *data) + #endif + } + +- write_sequnlock(&zonelist_update_seq); + printk_deferred_exit(); +- local_irq_restore(flags); ++ write_sequnlock_irqrestore(&zonelist_update_seq, flags); + } + + static noinline void __init +-- +2.43.0 + |