summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch')
-rw-r--r--debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch309
1 files changed, 309 insertions, 0 deletions
diff --git a/debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch b/debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch
new file mode 100644
index 000000000..ecd08de62
--- /dev/null
+++ b/debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch
@@ -0,0 +1,309 @@
+From 7aa261f53a405c0862f4857b46ef3344bbe6385f Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Tue, 3 Nov 2020 10:27:36 +0100
+Subject: [PATCH 047/323] sched: highmem: Store local kmaps in task struct
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz
+
+Instead of storing the map per CPU provide and use per task storage. That
+prepares for local kmaps which are preemptible.
+
+The context switch code is preparatory and not yet in use because
+kmap_atomic() runs with preemption disabled. Will be made usable in the
+next step.
+
+The context switch logic is safe even when an interrupt happens after
+clearing or before restoring the kmaps. The kmap index in task struct is
+not modified so any nesting kmap in an interrupt will use unused indices
+and on return the counter is the same as before.
+
+Also add an assert into the return to user space code. Going back to user
+space with an active kmap local is a nono.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ include/linux/highmem-internal.h | 10 ++++
+ include/linux/sched.h | 9 +++
+ kernel/entry/common.c | 2 +
+ kernel/fork.c | 1 +
+ kernel/sched/core.c | 18 ++++++
+ mm/highmem.c | 99 ++++++++++++++++++++++++++++----
+ 6 files changed, 129 insertions(+), 10 deletions(-)
+
+diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
+index 6ceed907b14e..c5a22177db85 100644
+--- a/include/linux/highmem-internal.h
++++ b/include/linux/highmem-internal.h
+@@ -9,6 +9,16 @@
+ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
+ void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
+ void kunmap_local_indexed(void *vaddr);
++void kmap_local_fork(struct task_struct *tsk);
++void __kmap_local_sched_out(void);
++void __kmap_local_sched_in(void);
++static inline void kmap_assert_nomap(void)
++{
++ DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx);
++}
++#else
++static inline void kmap_local_fork(struct task_struct *tsk) { }
++static inline void kmap_assert_nomap(void) { }
+ #endif
+
+ #ifdef CONFIG_HIGHMEM
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index bff48e9f32db..82de1ab42497 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -34,6 +34,7 @@
+ #include <linux/rseq.h>
+ #include <linux/seqlock.h>
+ #include <linux/kcsan.h>
++#include <asm/kmap_size.h>
+
+ /* task_struct member predeclarations (sorted alphabetically): */
+ struct audit_context;
+@@ -641,6 +642,13 @@ struct wake_q_node {
+ struct wake_q_node *next;
+ };
+
++struct kmap_ctrl {
++#ifdef CONFIG_KMAP_LOCAL
++ int idx;
++ pte_t pteval[KM_MAX_IDX];
++#endif
++};
++
+ struct task_struct {
+ #ifdef CONFIG_THREAD_INFO_IN_TASK
+ /*
+@@ -1323,6 +1331,7 @@ struct task_struct {
+ unsigned int sequential_io;
+ unsigned int sequential_io_avg;
+ #endif
++ struct kmap_ctrl kmap_ctrl;
+ #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+ unsigned long task_state_change;
+ #endif
+diff --git a/kernel/entry/common.c b/kernel/entry/common.c
+index 09f58853f692..e6a66de1202a 100644
+--- a/kernel/entry/common.c
++++ b/kernel/entry/common.c
+@@ -2,6 +2,7 @@
+
+ #include <linux/context_tracking.h>
+ #include <linux/entry-common.h>
++#include <linux/highmem.h>
+ #include <linux/livepatch.h>
+ #include <linux/audit.h>
+
+@@ -202,6 +203,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
+
+ /* Ensure that the address limit is intact and no locks are held */
+ addr_limit_user_check();
++ kmap_assert_nomap();
+ lockdep_assert_irqs_disabled();
+ lockdep_sys_exit();
+ }
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 633b0af1d1a7..32b9d7205ac1 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -942,6 +942,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
+ account_kernel_stack(tsk, 1);
+
+ kcov_task_init(tsk);
++ kmap_local_fork(tsk);
+
+ #ifdef CONFIG_FAULT_INJECTION
+ tsk->fail_nth = 0;
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 8a6135a0b2ee..390b51366f5e 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4092,6 +4092,22 @@ static inline void finish_lock_switch(struct rq *rq)
+ # define finish_arch_post_lock_switch() do { } while (0)
+ #endif
+
++static inline void kmap_local_sched_out(void)
++{
++#ifdef CONFIG_KMAP_LOCAL
++ if (unlikely(current->kmap_ctrl.idx))
++ __kmap_local_sched_out();
++#endif
++}
++
++static inline void kmap_local_sched_in(void)
++{
++#ifdef CONFIG_KMAP_LOCAL
++ if (unlikely(current->kmap_ctrl.idx))
++ __kmap_local_sched_in();
++#endif
++}
++
+ /**
+ * prepare_task_switch - prepare to switch tasks
+ * @rq: the runqueue preparing to switch
+@@ -4114,6 +4130,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
+ perf_event_task_sched_out(prev, next);
+ rseq_preempt(prev);
+ fire_sched_out_preempt_notifiers(prev, next);
++ kmap_local_sched_out();
+ prepare_task(next);
+ prepare_arch_switch(next);
+ }
+@@ -4180,6 +4197,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
+ finish_lock_switch(rq);
+ finish_arch_post_lock_switch();
+ kcov_finish_switch(current);
++ kmap_local_sched_in();
+
+ fire_sched_in_preempt_notifiers(current);
+ /*
+diff --git a/mm/highmem.c b/mm/highmem.c
+index 54bd233846c9..d7a1c80001d0 100644
+--- a/mm/highmem.c
++++ b/mm/highmem.c
+@@ -365,8 +365,6 @@ EXPORT_SYMBOL(kunmap_high);
+
+ #include <asm/kmap_size.h>
+
+-static DEFINE_PER_CPU(int, __kmap_local_idx);
+-
+ /*
+ * With DEBUG_HIGHMEM the stack depth is doubled and every second
+ * slot is unused which acts as a guard page
+@@ -379,23 +377,21 @@ static DEFINE_PER_CPU(int, __kmap_local_idx);
+
+ static inline int kmap_local_idx_push(void)
+ {
+- int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1;
+-
+ WARN_ON_ONCE(in_irq() && !irqs_disabled());
+- BUG_ON(idx >= KM_MAX_IDX);
+- return idx;
++ current->kmap_ctrl.idx += KM_INCR;
++ BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX);
++ return current->kmap_ctrl.idx - 1;
+ }
+
+ static inline int kmap_local_idx(void)
+ {
+- return __this_cpu_read(__kmap_local_idx) - 1;
++ return current->kmap_ctrl.idx - 1;
+ }
+
+ static inline void kmap_local_idx_pop(void)
+ {
+- int idx = __this_cpu_sub_return(__kmap_local_idx, KM_INCR);
+-
+- BUG_ON(idx < 0);
++ current->kmap_ctrl.idx -= KM_INCR;
++ BUG_ON(current->kmap_ctrl.idx < 0);
+ }
+
+ #ifndef arch_kmap_local_post_map
+@@ -461,6 +457,7 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
+ pteval = pfn_pte(pfn, prot);
+ set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval);
+ arch_kmap_local_post_map(vaddr, pteval);
++ current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
+ preempt_enable();
+
+ return (void *)vaddr;
+@@ -505,10 +502,92 @@ void kunmap_local_indexed(void *vaddr)
+ arch_kmap_local_pre_unmap(addr);
+ pte_clear(&init_mm, addr, kmap_pte - idx);
+ arch_kmap_local_post_unmap(addr);
++ current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
+ kmap_local_idx_pop();
+ preempt_enable();
+ }
+ EXPORT_SYMBOL(kunmap_local_indexed);
++
++/*
++ * Invoked before switch_to(). This is safe even when during or after
++ * clearing the maps an interrupt which needs a kmap_local happens because
++ * the task::kmap_ctrl.idx is not modified by the unmapping code so a
++ * nested kmap_local will use the next unused index and restore the index
++ * on unmap. The already cleared kmaps of the outgoing task are irrelevant
++ * because the interrupt context does not know about them. The same applies
++ * when scheduling back in for an interrupt which happens before the
++ * restore is complete.
++ */
++void __kmap_local_sched_out(void)
++{
++ struct task_struct *tsk = current;
++ pte_t *kmap_pte = kmap_get_pte();
++ int i;
++
++ /* Clear kmaps */
++ for (i = 0; i < tsk->kmap_ctrl.idx; i++) {
++ pte_t pteval = tsk->kmap_ctrl.pteval[i];
++ unsigned long addr;
++ int idx;
++
++ /* With debug all even slots are unmapped and act as guard */
++ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
++ WARN_ON_ONCE(!pte_none(pteval));
++ continue;
++ }
++ if (WARN_ON_ONCE(pte_none(pteval)))
++ continue;
++
++ /*
++ * This is a horrible hack for XTENSA to calculate the
++ * coloured PTE index. Uses the PFN encoded into the pteval
++ * and the map index calculation because the actual mapped
++ * virtual address is not stored in task::kmap_ctrl.
++ * For any sane architecture this is optimized out.
++ */
++ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
++
++ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++ arch_kmap_local_pre_unmap(addr);
++ pte_clear(&init_mm, addr, kmap_pte - idx);
++ arch_kmap_local_post_unmap(addr);
++ }
++}
++
++void __kmap_local_sched_in(void)
++{
++ struct task_struct *tsk = current;
++ pte_t *kmap_pte = kmap_get_pte();
++ int i;
++
++ /* Restore kmaps */
++ for (i = 0; i < tsk->kmap_ctrl.idx; i++) {
++ pte_t pteval = tsk->kmap_ctrl.pteval[i];
++ unsigned long addr;
++ int idx;
++
++ /* With debug all even slots are unmapped and act as guard */
++ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
++ WARN_ON_ONCE(!pte_none(pteval));
++ continue;
++ }
++ if (WARN_ON_ONCE(pte_none(pteval)))
++ continue;
++
++ /* See comment in __kmap_local_sched_out() */
++ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
++ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++ set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
++ arch_kmap_local_post_map(addr, pteval);
++ }
++}
++
++void kmap_local_fork(struct task_struct *tsk)
++{
++ if (WARN_ON_ONCE(tsk->kmap_ctrl.idx))
++ memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl));
++}
++
+ #endif
+
+ #if defined(HASHED_PAGE_VIRTUAL)
+--
+2.43.0
+