diff options
Diffstat (limited to 'debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch')
-rw-r--r-- | debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch | 309 |
1 files changed, 309 insertions, 0 deletions
diff --git a/debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch b/debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch new file mode 100644 index 000000000..ecd08de62 --- /dev/null +++ b/debian/patches-rt/0047-sched-highmem-Store-local-kmaps-in-task-struct.patch @@ -0,0 +1,309 @@ +From 7aa261f53a405c0862f4857b46ef3344bbe6385f Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue, 3 Nov 2020 10:27:36 +0100 +Subject: [PATCH 047/323] sched: highmem: Store local kmaps in task struct +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz + +Instead of storing the map per CPU provide and use per task storage. That +prepares for local kmaps which are preemptible. + +The context switch code is preparatory and not yet in use because +kmap_atomic() runs with preemption disabled. Will be made usable in the +next step. + +The context switch logic is safe even when an interrupt happens after +clearing or before restoring the kmaps. The kmap index in task struct is +not modified so any nesting kmap in an interrupt will use unused indices +and on return the counter is the same as before. + +Also add an assert into the return to user space code. Going back to user +space with an active kmap local is a nono. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/highmem-internal.h | 10 ++++ + include/linux/sched.h | 9 +++ + kernel/entry/common.c | 2 + + kernel/fork.c | 1 + + kernel/sched/core.c | 18 ++++++ + mm/highmem.c | 99 ++++++++++++++++++++++++++++---- + 6 files changed, 129 insertions(+), 10 deletions(-) + +diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h +index 6ceed907b14e..c5a22177db85 100644 +--- a/include/linux/highmem-internal.h ++++ b/include/linux/highmem-internal.h +@@ -9,6 +9,16 @@ + void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot); + void *__kmap_local_page_prot(struct page *page, pgprot_t prot); + void kunmap_local_indexed(void *vaddr); ++void kmap_local_fork(struct task_struct *tsk); ++void __kmap_local_sched_out(void); ++void __kmap_local_sched_in(void); ++static inline void kmap_assert_nomap(void) ++{ ++ DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx); ++} ++#else ++static inline void kmap_local_fork(struct task_struct *tsk) { } ++static inline void kmap_assert_nomap(void) { } + #endif + + #ifdef CONFIG_HIGHMEM +diff --git a/include/linux/sched.h b/include/linux/sched.h +index bff48e9f32db..82de1ab42497 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -34,6 +34,7 @@ + #include <linux/rseq.h> + #include <linux/seqlock.h> + #include <linux/kcsan.h> ++#include <asm/kmap_size.h> + + /* task_struct member predeclarations (sorted alphabetically): */ + struct audit_context; +@@ -641,6 +642,13 @@ struct wake_q_node { + struct wake_q_node *next; + }; + ++struct kmap_ctrl { ++#ifdef CONFIG_KMAP_LOCAL ++ int idx; ++ pte_t pteval[KM_MAX_IDX]; ++#endif ++}; ++ + struct task_struct { + #ifdef CONFIG_THREAD_INFO_IN_TASK + /* +@@ -1323,6 +1331,7 @@ struct task_struct { + unsigned int sequential_io; + unsigned int sequential_io_avg; + #endif ++ struct kmap_ctrl kmap_ctrl; + #ifdef CONFIG_DEBUG_ATOMIC_SLEEP + unsigned long task_state_change; + #endif +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index 09f58853f692..e6a66de1202a 100644 +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -2,6 +2,7 @@ + + #include <linux/context_tracking.h> + #include <linux/entry-common.h> ++#include <linux/highmem.h> + #include <linux/livepatch.h> + #include <linux/audit.h> + +@@ -202,6 +203,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) + + /* Ensure that the address limit is intact and no locks are held */ + addr_limit_user_check(); ++ kmap_assert_nomap(); + lockdep_assert_irqs_disabled(); + lockdep_sys_exit(); + } +diff --git a/kernel/fork.c b/kernel/fork.c +index 633b0af1d1a7..32b9d7205ac1 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -942,6 +942,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) + account_kernel_stack(tsk, 1); + + kcov_task_init(tsk); ++ kmap_local_fork(tsk); + + #ifdef CONFIG_FAULT_INJECTION + tsk->fail_nth = 0; +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 8a6135a0b2ee..390b51366f5e 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -4092,6 +4092,22 @@ static inline void finish_lock_switch(struct rq *rq) + # define finish_arch_post_lock_switch() do { } while (0) + #endif + ++static inline void kmap_local_sched_out(void) ++{ ++#ifdef CONFIG_KMAP_LOCAL ++ if (unlikely(current->kmap_ctrl.idx)) ++ __kmap_local_sched_out(); ++#endif ++} ++ ++static inline void kmap_local_sched_in(void) ++{ ++#ifdef CONFIG_KMAP_LOCAL ++ if (unlikely(current->kmap_ctrl.idx)) ++ __kmap_local_sched_in(); ++#endif ++} ++ + /** + * prepare_task_switch - prepare to switch tasks + * @rq: the runqueue preparing to switch +@@ -4114,6 +4130,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, + perf_event_task_sched_out(prev, next); + rseq_preempt(prev); + fire_sched_out_preempt_notifiers(prev, next); ++ kmap_local_sched_out(); + prepare_task(next); + prepare_arch_switch(next); + } +@@ -4180,6 +4197,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) + finish_lock_switch(rq); + finish_arch_post_lock_switch(); + kcov_finish_switch(current); ++ kmap_local_sched_in(); + + fire_sched_in_preempt_notifiers(current); + /* +diff --git a/mm/highmem.c b/mm/highmem.c +index 54bd233846c9..d7a1c80001d0 100644 +--- a/mm/highmem.c ++++ b/mm/highmem.c +@@ -365,8 +365,6 @@ EXPORT_SYMBOL(kunmap_high); + + #include <asm/kmap_size.h> + +-static DEFINE_PER_CPU(int, __kmap_local_idx); +- + /* + * With DEBUG_HIGHMEM the stack depth is doubled and every second + * slot is unused which acts as a guard page +@@ -379,23 +377,21 @@ static DEFINE_PER_CPU(int, __kmap_local_idx); + + static inline int kmap_local_idx_push(void) + { +- int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1; +- + WARN_ON_ONCE(in_irq() && !irqs_disabled()); +- BUG_ON(idx >= KM_MAX_IDX); +- return idx; ++ current->kmap_ctrl.idx += KM_INCR; ++ BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX); ++ return current->kmap_ctrl.idx - 1; + } + + static inline int kmap_local_idx(void) + { +- return __this_cpu_read(__kmap_local_idx) - 1; ++ return current->kmap_ctrl.idx - 1; + } + + static inline void kmap_local_idx_pop(void) + { +- int idx = __this_cpu_sub_return(__kmap_local_idx, KM_INCR); +- +- BUG_ON(idx < 0); ++ current->kmap_ctrl.idx -= KM_INCR; ++ BUG_ON(current->kmap_ctrl.idx < 0); + } + + #ifndef arch_kmap_local_post_map +@@ -461,6 +457,7 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot) + pteval = pfn_pte(pfn, prot); + set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval); + arch_kmap_local_post_map(vaddr, pteval); ++ current->kmap_ctrl.pteval[kmap_local_idx()] = pteval; + preempt_enable(); + + return (void *)vaddr; +@@ -505,10 +502,92 @@ void kunmap_local_indexed(void *vaddr) + arch_kmap_local_pre_unmap(addr); + pte_clear(&init_mm, addr, kmap_pte - idx); + arch_kmap_local_post_unmap(addr); ++ current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0); + kmap_local_idx_pop(); + preempt_enable(); + } + EXPORT_SYMBOL(kunmap_local_indexed); ++ ++/* ++ * Invoked before switch_to(). This is safe even when during or after ++ * clearing the maps an interrupt which needs a kmap_local happens because ++ * the task::kmap_ctrl.idx is not modified by the unmapping code so a ++ * nested kmap_local will use the next unused index and restore the index ++ * on unmap. The already cleared kmaps of the outgoing task are irrelevant ++ * because the interrupt context does not know about them. The same applies ++ * when scheduling back in for an interrupt which happens before the ++ * restore is complete. ++ */ ++void __kmap_local_sched_out(void) ++{ ++ struct task_struct *tsk = current; ++ pte_t *kmap_pte = kmap_get_pte(); ++ int i; ++ ++ /* Clear kmaps */ ++ for (i = 0; i < tsk->kmap_ctrl.idx; i++) { ++ pte_t pteval = tsk->kmap_ctrl.pteval[i]; ++ unsigned long addr; ++ int idx; ++ ++ /* With debug all even slots are unmapped and act as guard */ ++ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { ++ WARN_ON_ONCE(!pte_none(pteval)); ++ continue; ++ } ++ if (WARN_ON_ONCE(pte_none(pteval))) ++ continue; ++ ++ /* ++ * This is a horrible hack for XTENSA to calculate the ++ * coloured PTE index. Uses the PFN encoded into the pteval ++ * and the map index calculation because the actual mapped ++ * virtual address is not stored in task::kmap_ctrl. ++ * For any sane architecture this is optimized out. ++ */ ++ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); ++ ++ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); ++ arch_kmap_local_pre_unmap(addr); ++ pte_clear(&init_mm, addr, kmap_pte - idx); ++ arch_kmap_local_post_unmap(addr); ++ } ++} ++ ++void __kmap_local_sched_in(void) ++{ ++ struct task_struct *tsk = current; ++ pte_t *kmap_pte = kmap_get_pte(); ++ int i; ++ ++ /* Restore kmaps */ ++ for (i = 0; i < tsk->kmap_ctrl.idx; i++) { ++ pte_t pteval = tsk->kmap_ctrl.pteval[i]; ++ unsigned long addr; ++ int idx; ++ ++ /* With debug all even slots are unmapped and act as guard */ ++ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { ++ WARN_ON_ONCE(!pte_none(pteval)); ++ continue; ++ } ++ if (WARN_ON_ONCE(pte_none(pteval))) ++ continue; ++ ++ /* See comment in __kmap_local_sched_out() */ ++ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); ++ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); ++ set_pte_at(&init_mm, addr, kmap_pte - idx, pteval); ++ arch_kmap_local_post_map(addr, pteval); ++ } ++} ++ ++void kmap_local_fork(struct task_struct *tsk) ++{ ++ if (WARN_ON_ONCE(tsk->kmap_ctrl.idx)) ++ memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl)); ++} ++ + #endif + + #if defined(HASHED_PAGE_VIRTUAL) +-- +2.43.0 + |