diff options
Diffstat (limited to '')
-rw-r--r-- | debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch | 616 |
1 files changed, 616 insertions, 0 deletions
diff --git a/debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch b/debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch new file mode 100644 index 000000000..973b6e870 --- /dev/null +++ b/debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch @@ -0,0 +1,616 @@ +From 34b182c6f6606c5a02a3ddea15356251ca28093b Mon Sep 17 00:00:00 2001 +From: Scott Wood <swood@redhat.com> +Date: Sat, 12 Oct 2019 01:52:13 -0500 +Subject: [PATCH 308/347] sched: Lazy migrate_disable processing +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.246-rt110.tar.xz + +[ Upstream commit 425c5b38779a860062aa62219dc920d374b13c17 ] + +Avoid overhead on the majority of migrate disable/enable sequences by +only manipulating scheduler data (and grabbing the relevant locks) when +the task actually schedules while migrate-disabled. A kernel build +showed around a 10% reduction in system time (with CONFIG_NR_CPUS=512). + +Instead of cpuhp_pin_lock, CPU hotplug is handled by keeping a per-CPU +count of the number of pinned tasks (including tasks which have not +scheduled in the migrate-disabled section); takedown_cpu() will +wait until that reaches zero (confirmed by take_cpu_down() in stop +machine context to deal with races) before migrating tasks off of the +cpu. + +To simplify synchronization, updating cpus_mask is no longer deferred +until migrate_enable(). This lets us not have to worry about +migrate_enable() missing the update if it's on the fast path (didn't +schedule during the migrate disabled section). It also makes the code +a bit simpler and reduces deviation from mainline. + +While the main motivation for this is the performance benefit, lazy +migrate disable also eliminates the restriction on calling +migrate_disable() while atomic but leaving the atomic region prior to +calling migrate_enable() -- though this won't help with local_bh_disable() +(and thus rcutorture) unless something similar is done with the recently +added local_lock. + +Signed-off-by: Scott Wood <swood@redhat.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> +--- + include/linux/cpu.h | 4 - + include/linux/sched.h | 11 +-- + init/init_task.c | 4 + + kernel/cpu.c | 103 +++++++++-------------- + kernel/sched/core.c | 182 +++++++++++++++++------------------------ + kernel/sched/sched.h | 4 + + lib/smp_processor_id.c | 3 + + 7 files changed, 129 insertions(+), 182 deletions(-) + +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index e67645924404..87347ccbba0c 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -118,8 +118,6 @@ extern void cpu_hotplug_disable(void); + extern void cpu_hotplug_enable(void); + void clear_tasks_mm_cpumask(int cpu); + int cpu_down(unsigned int cpu); +-extern void pin_current_cpu(void); +-extern void unpin_current_cpu(void); + + #else /* CONFIG_HOTPLUG_CPU */ + +@@ -131,8 +129,6 @@ static inline int cpus_read_trylock(void) { return true; } + static inline void lockdep_assert_cpus_held(void) { } + static inline void cpu_hotplug_disable(void) { } + static inline void cpu_hotplug_enable(void) { } +-static inline void pin_current_cpu(void) { } +-static inline void unpin_current_cpu(void) { } + + #endif /* !CONFIG_HOTPLUG_CPU */ + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index e567fe2d7058..65069db8923c 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -227,6 +227,8 @@ extern void io_schedule_finish(int token); + extern long io_schedule_timeout(long timeout); + extern void io_schedule(void); + ++int cpu_nr_pinned(int cpu); ++ + /** + * struct prev_cputime - snapshot of system and user cputime + * @utime: time spent in user mode +@@ -670,16 +672,13 @@ struct task_struct { + cpumask_t cpus_mask; + #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) + int migrate_disable; +- int migrate_disable_update; +- int pinned_on_cpu; ++ bool migrate_disable_scheduled; + # ifdef CONFIG_SCHED_DEBUG +- int migrate_disable_atomic; ++ int pinned_on_cpu; + # endif +- + #elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) + # ifdef CONFIG_SCHED_DEBUG + int migrate_disable; +- int migrate_disable_atomic; + # endif + #endif + #ifdef CONFIG_PREEMPT_RT_FULL +@@ -2058,4 +2057,6 @@ static inline void rseq_syscall(struct pt_regs *regs) + + #endif + ++extern struct task_struct *takedown_cpu_task; ++ + #endif +diff --git a/init/init_task.c b/init/init_task.c +index 634becebd713..45b84137c4b3 100644 +--- a/init/init_task.c ++++ b/init/init_task.c +@@ -80,6 +80,10 @@ struct task_struct init_task + .cpus_ptr = &init_task.cpus_mask, + .cpus_mask = CPU_MASK_ALL, + .nr_cpus_allowed= NR_CPUS, ++#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) && \ ++ defined(CONFIG_SCHED_DEBUG) ++ .pinned_on_cpu = -1, ++#endif + .mm = NULL, + .active_mm = &init_mm, + .restart_block = { +diff --git a/kernel/cpu.c b/kernel/cpu.c +index c84a93f84164..edd4fd8da726 100644 +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -77,11 +77,6 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = { + .fail = CPUHP_INVALID, + }; + +-#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PREEMPT_RT_FULL) +-static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \ +- __RWLOCK_RT_INITIALIZER(cpuhp_pin_lock); +-#endif +- + #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) + static struct lockdep_map cpuhp_state_up_map = + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map); +@@ -288,57 +283,6 @@ static int cpu_hotplug_disabled; + + #ifdef CONFIG_HOTPLUG_CPU + +-/** +- * pin_current_cpu - Prevent the current cpu from being unplugged +- */ +-void pin_current_cpu(void) +-{ +-#ifdef CONFIG_PREEMPT_RT_FULL +- struct rt_rw_lock *cpuhp_pin; +- unsigned int cpu; +- int ret; +- +-again: +- cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock); +- ret = __read_rt_trylock(cpuhp_pin); +- if (ret) { +- current->pinned_on_cpu = smp_processor_id(); +- return; +- } +- cpu = smp_processor_id(); +- preempt_lazy_enable(); +- preempt_enable(); +- +- sleeping_lock_inc(); +- __read_rt_lock(cpuhp_pin); +- sleeping_lock_dec(); +- +- preempt_disable(); +- preempt_lazy_disable(); +- if (cpu != smp_processor_id()) { +- __read_rt_unlock(cpuhp_pin); +- goto again; +- } +- current->pinned_on_cpu = cpu; +-#endif +-} +- +-/** +- * unpin_current_cpu - Allow unplug of current cpu +- */ +-void unpin_current_cpu(void) +-{ +-#ifdef CONFIG_PREEMPT_RT_FULL +- struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock); +- +- if (WARN_ON(current->pinned_on_cpu != smp_processor_id())) +- cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, current->pinned_on_cpu); +- +- current->pinned_on_cpu = -1; +- __read_rt_unlock(cpuhp_pin); +-#endif +-} +- + DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); + + void cpus_read_lock(void) +@@ -932,6 +876,15 @@ static int take_cpu_down(void *_param) + int err, cpu = smp_processor_id(); + int ret; + ++#ifdef CONFIG_PREEMPT_RT_BASE ++ /* ++ * If any tasks disabled migration before we got here, ++ * go back and sleep again. ++ */ ++ if (cpu_nr_pinned(cpu)) ++ return -EAGAIN; ++#endif ++ + /* Ensure this CPU doesn't handle any more interrupts. */ + err = __cpu_disable(); + if (err < 0) +@@ -959,11 +912,10 @@ static int take_cpu_down(void *_param) + return 0; + } + ++struct task_struct *takedown_cpu_task; ++ + static int takedown_cpu(unsigned int cpu) + { +-#ifdef CONFIG_PREEMPT_RT_FULL +- struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu); +-#endif + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + int err; + +@@ -976,17 +928,38 @@ static int takedown_cpu(unsigned int cpu) + */ + irq_lock_sparse(); + +-#ifdef CONFIG_PREEMPT_RT_FULL +- __write_rt_lock(cpuhp_pin); ++#ifdef CONFIG_PREEMPT_RT_BASE ++ WARN_ON_ONCE(takedown_cpu_task); ++ takedown_cpu_task = current; ++ ++again: ++ /* ++ * If a task pins this CPU after we pass this check, take_cpu_down ++ * will return -EAGAIN. ++ */ ++ for (;;) { ++ int nr_pinned; ++ ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ nr_pinned = cpu_nr_pinned(cpu); ++ if (nr_pinned == 0) ++ break; ++ schedule(); ++ } ++ set_current_state(TASK_RUNNING); + #endif + + /* + * So now all preempt/rcu users must observe !cpu_active(). + */ + err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); ++#ifdef CONFIG_PREEMPT_RT_BASE ++ if (err == -EAGAIN) ++ goto again; ++#endif + if (err) { +-#ifdef CONFIG_PREEMPT_RT_FULL +- __write_rt_unlock(cpuhp_pin); ++#ifdef CONFIG_PREEMPT_RT_BASE ++ takedown_cpu_task = NULL; + #endif + /* CPU refused to die */ + irq_unlock_sparse(); +@@ -1006,8 +979,8 @@ static int takedown_cpu(unsigned int cpu) + wait_for_ap_thread(st, false); + BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); + +-#ifdef CONFIG_PREEMPT_RT_FULL +- __write_rt_unlock(cpuhp_pin); ++#ifdef CONFIG_PREEMPT_RT_BASE ++ takedown_cpu_task = NULL; + #endif + /* Interrupts are moved away from the dying cpu, reenable alloc/free */ + irq_unlock_sparse(); +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 7eb3037c0b35..de6514e13e0c 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1065,7 +1065,8 @@ static int migration_cpu_stop(void *data) + void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) + { + cpumask_copy(&p->cpus_mask, new_mask); +- p->nr_cpus_allowed = cpumask_weight(new_mask); ++ if (p->cpus_ptr == &p->cpus_mask) ++ p->nr_cpus_allowed = cpumask_weight(new_mask); + } + + #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) +@@ -1076,8 +1077,7 @@ int __migrate_disabled(struct task_struct *p) + EXPORT_SYMBOL_GPL(__migrate_disabled); + #endif + +-static void __do_set_cpus_allowed_tail(struct task_struct *p, +- const struct cpumask *new_mask) ++void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + { + struct rq *rq = task_rq(p); + bool queued, running; +@@ -1106,20 +1106,6 @@ static void __do_set_cpus_allowed_tail(struct task_struct *p, + set_curr_task(rq, p); + } + +-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) +-{ +-#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) +- if (__migrate_disabled(p)) { +- lockdep_assert_held(&p->pi_lock); +- +- cpumask_copy(&p->cpus_mask, new_mask); +- p->migrate_disable_update = 1; +- return; +- } +-#endif +- __do_set_cpus_allowed_tail(p, new_mask); +-} +- + /* + * Change a given task's CPU affinity. Migrate the thread to a + * proper CPU and schedule it away if the CPU it's executing on +@@ -1179,7 +1165,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, + } + + /* Can the task run on the task's current CPU? If so, we're done */ +- if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) ++ if (cpumask_test_cpu(task_cpu(p), new_mask) || ++ p->cpus_ptr != &p->cpus_mask) + goto out; + + if (task_running(rq, p) || p->state == TASK_WAKING) { +@@ -3459,6 +3446,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) + BUG(); + } + ++static void migrate_disabled_sched(struct task_struct *p); ++ + /* + * __schedule() is the main scheduler function. + * +@@ -3529,6 +3518,9 @@ static void __sched notrace __schedule(bool preempt) + rq_lock(rq, &rf); + smp_mb__after_spinlock(); + ++ if (__migrate_disabled(prev)) ++ migrate_disabled_sched(prev); ++ + /* Promote REQ to ACT */ + rq->clock_update_flags <<= 1; + update_rq_clock(rq); +@@ -5777,6 +5769,8 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) + BUG_ON(!next); + put_prev_task(rq, next); + ++ WARN_ON_ONCE(__migrate_disabled(next)); ++ + /* + * Rules for changing task_struct::cpus_mask are holding + * both pi_lock and rq->lock, such that holding either +@@ -7241,14 +7235,9 @@ update_nr_migratory(struct task_struct *p, long delta) + static inline void + migrate_disable_update_cpus_allowed(struct task_struct *p) + { +- struct rq *rq; +- struct rq_flags rf; +- +- rq = task_rq_lock(p, &rf); + p->cpus_ptr = cpumask_of(smp_processor_id()); + update_nr_migratory(p, -1); + p->nr_cpus_allowed = 1; +- task_rq_unlock(rq, p, &rf); + } + + static inline void +@@ -7266,54 +7255,35 @@ migrate_enable_update_cpus_allowed(struct task_struct *p) + + void migrate_disable(void) + { +- struct task_struct *p = current; ++ preempt_disable(); + +- if (in_atomic() || irqs_disabled()) { ++ if (++current->migrate_disable == 1) { ++ this_rq()->nr_pinned++; ++ preempt_lazy_disable(); + #ifdef CONFIG_SCHED_DEBUG +- p->migrate_disable_atomic++; ++ WARN_ON_ONCE(current->pinned_on_cpu >= 0); ++ current->pinned_on_cpu = smp_processor_id(); + #endif +- return; +- } +-#ifdef CONFIG_SCHED_DEBUG +- if (unlikely(p->migrate_disable_atomic)) { +- tracing_off(); +- WARN_ON_ONCE(1); + } +-#endif + +- if (p->migrate_disable) { +- p->migrate_disable++; +- return; +- } ++ preempt_enable(); ++} ++EXPORT_SYMBOL(migrate_disable); + +- preempt_disable(); +- preempt_lazy_disable(); +- pin_current_cpu(); ++static void migrate_disabled_sched(struct task_struct *p) ++{ ++ if (p->migrate_disable_scheduled) ++ return; + + migrate_disable_update_cpus_allowed(p); +- p->migrate_disable = 1; +- +- preempt_enable(); ++ p->migrate_disable_scheduled = 1; + } +-EXPORT_SYMBOL(migrate_disable); + + void migrate_enable(void) + { + struct task_struct *p = current; +- +- if (in_atomic() || irqs_disabled()) { +-#ifdef CONFIG_SCHED_DEBUG +- p->migrate_disable_atomic--; +-#endif +- return; +- } +- +-#ifdef CONFIG_SCHED_DEBUG +- if (unlikely(p->migrate_disable_atomic)) { +- tracing_off(); +- WARN_ON_ONCE(1); +- } +-#endif ++ struct rq *rq = this_rq(); ++ int cpu = task_cpu(p); + + WARN_ON_ONCE(p->migrate_disable <= 0); + if (p->migrate_disable > 1) { +@@ -7323,67 +7293,69 @@ void migrate_enable(void) + + preempt_disable(); + ++#ifdef CONFIG_SCHED_DEBUG ++ WARN_ON_ONCE(current->pinned_on_cpu != cpu); ++ current->pinned_on_cpu = -1; ++#endif ++ ++ WARN_ON_ONCE(rq->nr_pinned < 1); ++ + p->migrate_disable = 0; ++ rq->nr_pinned--; ++ if (rq->nr_pinned == 0 && unlikely(!cpu_active(cpu)) && ++ takedown_cpu_task) ++ wake_up_process(takedown_cpu_task); ++ ++ if (!p->migrate_disable_scheduled) ++ goto out; ++ ++ p->migrate_disable_scheduled = 0; ++ + migrate_enable_update_cpus_allowed(p); + +- if (p->migrate_disable_update) { +- struct rq *rq; ++ WARN_ON(smp_processor_id() != cpu); ++ if (!is_cpu_allowed(p, cpu)) { ++ struct migration_arg arg = { p }; + struct rq_flags rf; +- int cpu = task_cpu(p); + + rq = task_rq_lock(p, &rf); + update_rq_clock(rq); +- +- __do_set_cpus_allowed_tail(p, &p->cpus_mask); ++ arg.dest_cpu = select_fallback_rq(cpu, p); + task_rq_unlock(rq, p, &rf); + +- p->migrate_disable_update = 0; +- +- WARN_ON(smp_processor_id() != cpu); +- if (!cpumask_test_cpu(cpu, &p->cpus_mask)) { +- struct migration_arg arg = { p }; +- struct rq_flags rf; ++ preempt_lazy_enable(); ++ preempt_enable(); + +- rq = task_rq_lock(p, &rf); +- update_rq_clock(rq); +- arg.dest_cpu = select_fallback_rq(cpu, p); +- task_rq_unlock(rq, p, &rf); +- +- unpin_current_cpu(); +- preempt_lazy_enable(); +- preempt_enable(); +- +- sleeping_lock_inc(); +- stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); +- sleeping_lock_dec(); +- tlb_migrate_finish(p->mm); ++ sleeping_lock_inc(); ++ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); ++ sleeping_lock_dec(); ++ tlb_migrate_finish(p->mm); + +- return; +- } ++ return; + } +- unpin_current_cpu(); ++ ++out: + preempt_lazy_enable(); + preempt_enable(); + } + EXPORT_SYMBOL(migrate_enable); + +-#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) +-void migrate_disable(void) ++int cpu_nr_pinned(int cpu) + { +-#ifdef CONFIG_SCHED_DEBUG +- struct task_struct *p = current; ++ struct rq *rq = cpu_rq(cpu); + +- if (in_atomic() || irqs_disabled()) { +- p->migrate_disable_atomic++; +- return; +- } ++ return rq->nr_pinned; ++} + +- if (unlikely(p->migrate_disable_atomic)) { +- tracing_off(); +- WARN_ON_ONCE(1); +- } ++#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) ++static void migrate_disabled_sched(struct task_struct *p) ++{ ++} + +- p->migrate_disable++; ++void migrate_disable(void) ++{ ++#ifdef CONFIG_SCHED_DEBUG ++ current->migrate_disable++; + #endif + barrier(); + } +@@ -7394,20 +7366,14 @@ void migrate_enable(void) + #ifdef CONFIG_SCHED_DEBUG + struct task_struct *p = current; + +- if (in_atomic() || irqs_disabled()) { +- p->migrate_disable_atomic--; +- return; +- } +- +- if (unlikely(p->migrate_disable_atomic)) { +- tracing_off(); +- WARN_ON_ONCE(1); +- } +- + WARN_ON_ONCE(p->migrate_disable <= 0); + p->migrate_disable--; + #endif + barrier(); + } + EXPORT_SYMBOL(migrate_enable); ++#else ++static void migrate_disabled_sched(struct task_struct *p) ++{ ++} + #endif +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index 87a05bb90124..45b3f135d205 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -915,6 +915,10 @@ struct rq { + /* Must be inspected within a rcu lock section */ + struct cpuidle_state *idle_state; + #endif ++ ++#if defined(CONFIG_PREEMPT_RT_BASE) && defined(CONFIG_SMP) ++ int nr_pinned; ++#endif + }; + + static inline int cpu_of(struct rq *rq) +diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c +index b8a8a8db2d75..0c80992aa337 100644 +--- a/lib/smp_processor_id.c ++++ b/lib/smp_processor_id.c +@@ -22,6 +22,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1, + * Kernel threads bound to a single CPU can safely use + * smp_processor_id(): + */ ++ if (current->migrate_disable) ++ goto out; ++ + if (current->nr_cpus_allowed == 1) + goto out; + +-- +2.36.1 + |