summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch97
1 files changed, 97 insertions, 0 deletions
diff --git a/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch b/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch
new file mode 100644
index 000000000..68d9f5008
--- /dev/null
+++ b/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch
@@ -0,0 +1,97 @@
+From 9512a9467dec62e03f2df4f15af9a38332b8de58 Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Fri, 23 Jun 2023 22:15:17 +0200
+Subject: [PATCH 58/62] mm/page_alloc: Use write_seqlock_irqsave() instead
+ write_seqlock() + local_irq_save().
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz
+
+__build_all_zonelists() acquires zonelist_update_seq by first disabling
+interrupts via local_irq_save() and then acquiring the seqlock with
+write_seqlock(). This is troublesome and leads to problems on
+PREEMPT_RT. The problem is that the inner spinlock_t becomes a sleeping
+lock on PREEMPT_RT and must not be acquired with disabled interrupts.
+
+The API provides write_seqlock_irqsave() which does the right thing in
+one step.
+printk_deferred_enter() has to be invoked in non-migrate-able context to
+ensure that deferred printing is enabled and disabled on the same CPU.
+This is the case after zonelist_update_seq has been acquired.
+
+There was discussion on the first submission that the order should be:
+ local_irq_disable();
+ printk_deferred_enter();
+ write_seqlock();
+
+to avoid pitfalls like having an unaccounted printk() coming from
+write_seqlock_irqsave() before printk_deferred_enter() is invoked. The
+only origin of such a printk() can be a lockdep splat because the
+lockdep annotation happens after the sequence count is incremented.
+This is exceptional and subject to change.
+
+It was also pointed that PREEMPT_RT can be affected by the printk
+problem since its write_seqlock_irqsave() does not really disable
+interrupts. This isn't the case because PREEMPT_RT's printk
+implementation differs from the mainline implementation in two important
+aspects:
+- Printing happens in a dedicated threads and not at during the
+ invocation of printk().
+- In emergency cases where synchronous printing is used, a different
+ driver is used which does not use tty_port::lock.
+
+Acquire zonelist_update_seq with write_seqlock_irqsave() and then defer
+printk output.
+
+Fixes: 1007843a91909 ("mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock")
+Acked-by: Michal Hocko <mhocko@suse.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Link: https://lore.kernel.org/r/20230623201517.yw286Knb@linutronix.de
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+(cherry picked from commit 4d1139baae8bc4fff3728d1d204bdb04c13dbe10)
+Signed-off-by: Clark Williams <clark.williams@gmail.com>
+---
+ mm/page_alloc.c | 15 ++++++---------
+ 1 file changed, 6 insertions(+), 9 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 4583f8a42d91..835b69a64f4f 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -6588,19 +6588,17 @@ static void __build_all_zonelists(void *data)
+ unsigned long flags;
+
+ /*
+- * Explicitly disable this CPU's interrupts before taking seqlock
+- * to prevent any IRQ handler from calling into the page allocator
+- * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
++ * The zonelist_update_seq must be acquired with irqsave because the
++ * reader can be invoked from IRQ with GFP_ATOMIC.
+ */
+- local_irq_save(flags);
++ write_seqlock_irqsave(&zonelist_update_seq, flags);
+ /*
+- * Explicitly disable this CPU's synchronous printk() before taking
+- * seqlock to prevent any printk() from trying to hold port->lock, for
++ * Also disable synchronous printk() to prevent any printk() from
++ * trying to hold port->lock, for
+ * tty_insert_flip_string_and_push_buffer() on other CPU might be
+ * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
+ */
+ printk_deferred_enter();
+- write_seqlock(&zonelist_update_seq);
+
+ #ifdef CONFIG_NUMA
+ memset(node_load, 0, sizeof(node_load));
+@@ -6637,9 +6635,8 @@ static void __build_all_zonelists(void *data)
+ #endif
+ }
+
+- write_sequnlock(&zonelist_update_seq);
+ printk_deferred_exit();
+- local_irq_restore(flags);
++ write_sequnlock_irqrestore(&zonelist_update_seq, flags);
+ }
+
+ static noinline void __init
+--
+2.43.0
+