summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch')
-rw-r--r--debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch616
1 files changed, 616 insertions, 0 deletions
diff --git a/debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch b/debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch
new file mode 100644
index 000000000..973b6e870
--- /dev/null
+++ b/debian/patches-rt/0308-sched-Lazy-migrate_disable-processing.patch
@@ -0,0 +1,616 @@
+From 34b182c6f6606c5a02a3ddea15356251ca28093b Mon Sep 17 00:00:00 2001
+From: Scott Wood <swood@redhat.com>
+Date: Sat, 12 Oct 2019 01:52:13 -0500
+Subject: [PATCH 308/347] sched: Lazy migrate_disable processing
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.246-rt110.tar.xz
+
+[ Upstream commit 425c5b38779a860062aa62219dc920d374b13c17 ]
+
+Avoid overhead on the majority of migrate disable/enable sequences by
+only manipulating scheduler data (and grabbing the relevant locks) when
+the task actually schedules while migrate-disabled. A kernel build
+showed around a 10% reduction in system time (with CONFIG_NR_CPUS=512).
+
+Instead of cpuhp_pin_lock, CPU hotplug is handled by keeping a per-CPU
+count of the number of pinned tasks (including tasks which have not
+scheduled in the migrate-disabled section); takedown_cpu() will
+wait until that reaches zero (confirmed by take_cpu_down() in stop
+machine context to deal with races) before migrating tasks off of the
+cpu.
+
+To simplify synchronization, updating cpus_mask is no longer deferred
+until migrate_enable(). This lets us not have to worry about
+migrate_enable() missing the update if it's on the fast path (didn't
+schedule during the migrate disabled section). It also makes the code
+a bit simpler and reduces deviation from mainline.
+
+While the main motivation for this is the performance benefit, lazy
+migrate disable also eliminates the restriction on calling
+migrate_disable() while atomic but leaving the atomic region prior to
+calling migrate_enable() -- though this won't help with local_bh_disable()
+(and thus rcutorture) unless something similar is done with the recently
+added local_lock.
+
+Signed-off-by: Scott Wood <swood@redhat.com>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
+---
+ include/linux/cpu.h | 4 -
+ include/linux/sched.h | 11 +--
+ init/init_task.c | 4 +
+ kernel/cpu.c | 103 +++++++++--------------
+ kernel/sched/core.c | 182 +++++++++++++++++------------------------
+ kernel/sched/sched.h | 4 +
+ lib/smp_processor_id.c | 3 +
+ 7 files changed, 129 insertions(+), 182 deletions(-)
+
+diff --git a/include/linux/cpu.h b/include/linux/cpu.h
+index e67645924404..87347ccbba0c 100644
+--- a/include/linux/cpu.h
++++ b/include/linux/cpu.h
+@@ -118,8 +118,6 @@ extern void cpu_hotplug_disable(void);
+ extern void cpu_hotplug_enable(void);
+ void clear_tasks_mm_cpumask(int cpu);
+ int cpu_down(unsigned int cpu);
+-extern void pin_current_cpu(void);
+-extern void unpin_current_cpu(void);
+
+ #else /* CONFIG_HOTPLUG_CPU */
+
+@@ -131,8 +129,6 @@ static inline int cpus_read_trylock(void) { return true; }
+ static inline void lockdep_assert_cpus_held(void) { }
+ static inline void cpu_hotplug_disable(void) { }
+ static inline void cpu_hotplug_enable(void) { }
+-static inline void pin_current_cpu(void) { }
+-static inline void unpin_current_cpu(void) { }
+
+ #endif /* !CONFIG_HOTPLUG_CPU */
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index e567fe2d7058..65069db8923c 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -227,6 +227,8 @@ extern void io_schedule_finish(int token);
+ extern long io_schedule_timeout(long timeout);
+ extern void io_schedule(void);
+
++int cpu_nr_pinned(int cpu);
++
+ /**
+ * struct prev_cputime - snapshot of system and user cputime
+ * @utime: time spent in user mode
+@@ -670,16 +672,13 @@ struct task_struct {
+ cpumask_t cpus_mask;
+ #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+ int migrate_disable;
+- int migrate_disable_update;
+- int pinned_on_cpu;
++ bool migrate_disable_scheduled;
+ # ifdef CONFIG_SCHED_DEBUG
+- int migrate_disable_atomic;
++ int pinned_on_cpu;
+ # endif
+-
+ #elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+ # ifdef CONFIG_SCHED_DEBUG
+ int migrate_disable;
+- int migrate_disable_atomic;
+ # endif
+ #endif
+ #ifdef CONFIG_PREEMPT_RT_FULL
+@@ -2058,4 +2057,6 @@ static inline void rseq_syscall(struct pt_regs *regs)
+
+ #endif
+
++extern struct task_struct *takedown_cpu_task;
++
+ #endif
+diff --git a/init/init_task.c b/init/init_task.c
+index 634becebd713..45b84137c4b3 100644
+--- a/init/init_task.c
++++ b/init/init_task.c
+@@ -80,6 +80,10 @@ struct task_struct init_task
+ .cpus_ptr = &init_task.cpus_mask,
+ .cpus_mask = CPU_MASK_ALL,
+ .nr_cpus_allowed= NR_CPUS,
++#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) && \
++ defined(CONFIG_SCHED_DEBUG)
++ .pinned_on_cpu = -1,
++#endif
+ .mm = NULL,
+ .active_mm = &init_mm,
+ .restart_block = {
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index c84a93f84164..edd4fd8da726 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -77,11 +77,6 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
+ .fail = CPUHP_INVALID,
+ };
+
+-#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PREEMPT_RT_FULL)
+-static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \
+- __RWLOCK_RT_INITIALIZER(cpuhp_pin_lock);
+-#endif
+-
+ #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
+ static struct lockdep_map cpuhp_state_up_map =
+ STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
+@@ -288,57 +283,6 @@ static int cpu_hotplug_disabled;
+
+ #ifdef CONFIG_HOTPLUG_CPU
+
+-/**
+- * pin_current_cpu - Prevent the current cpu from being unplugged
+- */
+-void pin_current_cpu(void)
+-{
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- struct rt_rw_lock *cpuhp_pin;
+- unsigned int cpu;
+- int ret;
+-
+-again:
+- cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
+- ret = __read_rt_trylock(cpuhp_pin);
+- if (ret) {
+- current->pinned_on_cpu = smp_processor_id();
+- return;
+- }
+- cpu = smp_processor_id();
+- preempt_lazy_enable();
+- preempt_enable();
+-
+- sleeping_lock_inc();
+- __read_rt_lock(cpuhp_pin);
+- sleeping_lock_dec();
+-
+- preempt_disable();
+- preempt_lazy_disable();
+- if (cpu != smp_processor_id()) {
+- __read_rt_unlock(cpuhp_pin);
+- goto again;
+- }
+- current->pinned_on_cpu = cpu;
+-#endif
+-}
+-
+-/**
+- * unpin_current_cpu - Allow unplug of current cpu
+- */
+-void unpin_current_cpu(void)
+-{
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock);
+-
+- if (WARN_ON(current->pinned_on_cpu != smp_processor_id()))
+- cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, current->pinned_on_cpu);
+-
+- current->pinned_on_cpu = -1;
+- __read_rt_unlock(cpuhp_pin);
+-#endif
+-}
+-
+ DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
+
+ void cpus_read_lock(void)
+@@ -932,6 +876,15 @@ static int take_cpu_down(void *_param)
+ int err, cpu = smp_processor_id();
+ int ret;
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++ /*
++ * If any tasks disabled migration before we got here,
++ * go back and sleep again.
++ */
++ if (cpu_nr_pinned(cpu))
++ return -EAGAIN;
++#endif
++
+ /* Ensure this CPU doesn't handle any more interrupts. */
+ err = __cpu_disable();
+ if (err < 0)
+@@ -959,11 +912,10 @@ static int take_cpu_down(void *_param)
+ return 0;
+ }
+
++struct task_struct *takedown_cpu_task;
++
+ static int takedown_cpu(unsigned int cpu)
+ {
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu);
+-#endif
+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+ int err;
+
+@@ -976,17 +928,38 @@ static int takedown_cpu(unsigned int cpu)
+ */
+ irq_lock_sparse();
+
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- __write_rt_lock(cpuhp_pin);
++#ifdef CONFIG_PREEMPT_RT_BASE
++ WARN_ON_ONCE(takedown_cpu_task);
++ takedown_cpu_task = current;
++
++again:
++ /*
++ * If a task pins this CPU after we pass this check, take_cpu_down
++ * will return -EAGAIN.
++ */
++ for (;;) {
++ int nr_pinned;
++
++ set_current_state(TASK_UNINTERRUPTIBLE);
++ nr_pinned = cpu_nr_pinned(cpu);
++ if (nr_pinned == 0)
++ break;
++ schedule();
++ }
++ set_current_state(TASK_RUNNING);
+ #endif
+
+ /*
+ * So now all preempt/rcu users must observe !cpu_active().
+ */
+ err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
++#ifdef CONFIG_PREEMPT_RT_BASE
++ if (err == -EAGAIN)
++ goto again;
++#endif
+ if (err) {
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- __write_rt_unlock(cpuhp_pin);
++#ifdef CONFIG_PREEMPT_RT_BASE
++ takedown_cpu_task = NULL;
+ #endif
+ /* CPU refused to die */
+ irq_unlock_sparse();
+@@ -1006,8 +979,8 @@ static int takedown_cpu(unsigned int cpu)
+ wait_for_ap_thread(st, false);
+ BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
+
+-#ifdef CONFIG_PREEMPT_RT_FULL
+- __write_rt_unlock(cpuhp_pin);
++#ifdef CONFIG_PREEMPT_RT_BASE
++ takedown_cpu_task = NULL;
+ #endif
+ /* Interrupts are moved away from the dying cpu, reenable alloc/free */
+ irq_unlock_sparse();
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 7eb3037c0b35..de6514e13e0c 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -1065,7 +1065,8 @@ static int migration_cpu_stop(void *data)
+ void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
+ {
+ cpumask_copy(&p->cpus_mask, new_mask);
+- p->nr_cpus_allowed = cpumask_weight(new_mask);
++ if (p->cpus_ptr == &p->cpus_mask)
++ p->nr_cpus_allowed = cpumask_weight(new_mask);
+ }
+
+ #if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+@@ -1076,8 +1077,7 @@ int __migrate_disabled(struct task_struct *p)
+ EXPORT_SYMBOL_GPL(__migrate_disabled);
+ #endif
+
+-static void __do_set_cpus_allowed_tail(struct task_struct *p,
+- const struct cpumask *new_mask)
++void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+ {
+ struct rq *rq = task_rq(p);
+ bool queued, running;
+@@ -1106,20 +1106,6 @@ static void __do_set_cpus_allowed_tail(struct task_struct *p,
+ set_curr_task(rq, p);
+ }
+
+-void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
+-{
+-#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+- if (__migrate_disabled(p)) {
+- lockdep_assert_held(&p->pi_lock);
+-
+- cpumask_copy(&p->cpus_mask, new_mask);
+- p->migrate_disable_update = 1;
+- return;
+- }
+-#endif
+- __do_set_cpus_allowed_tail(p, new_mask);
+-}
+-
+ /*
+ * Change a given task's CPU affinity. Migrate the thread to a
+ * proper CPU and schedule it away if the CPU it's executing on
+@@ -1179,7 +1165,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
+ }
+
+ /* Can the task run on the task's current CPU? If so, we're done */
+- if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p))
++ if (cpumask_test_cpu(task_cpu(p), new_mask) ||
++ p->cpus_ptr != &p->cpus_mask)
+ goto out;
+
+ if (task_running(rq, p) || p->state == TASK_WAKING) {
+@@ -3459,6 +3446,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+ BUG();
+ }
+
++static void migrate_disabled_sched(struct task_struct *p);
++
+ /*
+ * __schedule() is the main scheduler function.
+ *
+@@ -3529,6 +3518,9 @@ static void __sched notrace __schedule(bool preempt)
+ rq_lock(rq, &rf);
+ smp_mb__after_spinlock();
+
++ if (__migrate_disabled(prev))
++ migrate_disabled_sched(prev);
++
+ /* Promote REQ to ACT */
+ rq->clock_update_flags <<= 1;
+ update_rq_clock(rq);
+@@ -5777,6 +5769,8 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf)
+ BUG_ON(!next);
+ put_prev_task(rq, next);
+
++ WARN_ON_ONCE(__migrate_disabled(next));
++
+ /*
+ * Rules for changing task_struct::cpus_mask are holding
+ * both pi_lock and rq->lock, such that holding either
+@@ -7241,14 +7235,9 @@ update_nr_migratory(struct task_struct *p, long delta)
+ static inline void
+ migrate_disable_update_cpus_allowed(struct task_struct *p)
+ {
+- struct rq *rq;
+- struct rq_flags rf;
+-
+- rq = task_rq_lock(p, &rf);
+ p->cpus_ptr = cpumask_of(smp_processor_id());
+ update_nr_migratory(p, -1);
+ p->nr_cpus_allowed = 1;
+- task_rq_unlock(rq, p, &rf);
+ }
+
+ static inline void
+@@ -7266,54 +7255,35 @@ migrate_enable_update_cpus_allowed(struct task_struct *p)
+
+ void migrate_disable(void)
+ {
+- struct task_struct *p = current;
++ preempt_disable();
+
+- if (in_atomic() || irqs_disabled()) {
++ if (++current->migrate_disable == 1) {
++ this_rq()->nr_pinned++;
++ preempt_lazy_disable();
+ #ifdef CONFIG_SCHED_DEBUG
+- p->migrate_disable_atomic++;
++ WARN_ON_ONCE(current->pinned_on_cpu >= 0);
++ current->pinned_on_cpu = smp_processor_id();
+ #endif
+- return;
+- }
+-#ifdef CONFIG_SCHED_DEBUG
+- if (unlikely(p->migrate_disable_atomic)) {
+- tracing_off();
+- WARN_ON_ONCE(1);
+ }
+-#endif
+
+- if (p->migrate_disable) {
+- p->migrate_disable++;
+- return;
+- }
++ preempt_enable();
++}
++EXPORT_SYMBOL(migrate_disable);
+
+- preempt_disable();
+- preempt_lazy_disable();
+- pin_current_cpu();
++static void migrate_disabled_sched(struct task_struct *p)
++{
++ if (p->migrate_disable_scheduled)
++ return;
+
+ migrate_disable_update_cpus_allowed(p);
+- p->migrate_disable = 1;
+-
+- preempt_enable();
++ p->migrate_disable_scheduled = 1;
+ }
+-EXPORT_SYMBOL(migrate_disable);
+
+ void migrate_enable(void)
+ {
+ struct task_struct *p = current;
+-
+- if (in_atomic() || irqs_disabled()) {
+-#ifdef CONFIG_SCHED_DEBUG
+- p->migrate_disable_atomic--;
+-#endif
+- return;
+- }
+-
+-#ifdef CONFIG_SCHED_DEBUG
+- if (unlikely(p->migrate_disable_atomic)) {
+- tracing_off();
+- WARN_ON_ONCE(1);
+- }
+-#endif
++ struct rq *rq = this_rq();
++ int cpu = task_cpu(p);
+
+ WARN_ON_ONCE(p->migrate_disable <= 0);
+ if (p->migrate_disable > 1) {
+@@ -7323,67 +7293,69 @@ void migrate_enable(void)
+
+ preempt_disable();
+
++#ifdef CONFIG_SCHED_DEBUG
++ WARN_ON_ONCE(current->pinned_on_cpu != cpu);
++ current->pinned_on_cpu = -1;
++#endif
++
++ WARN_ON_ONCE(rq->nr_pinned < 1);
++
+ p->migrate_disable = 0;
++ rq->nr_pinned--;
++ if (rq->nr_pinned == 0 && unlikely(!cpu_active(cpu)) &&
++ takedown_cpu_task)
++ wake_up_process(takedown_cpu_task);
++
++ if (!p->migrate_disable_scheduled)
++ goto out;
++
++ p->migrate_disable_scheduled = 0;
++
+ migrate_enable_update_cpus_allowed(p);
+
+- if (p->migrate_disable_update) {
+- struct rq *rq;
++ WARN_ON(smp_processor_id() != cpu);
++ if (!is_cpu_allowed(p, cpu)) {
++ struct migration_arg arg = { p };
+ struct rq_flags rf;
+- int cpu = task_cpu(p);
+
+ rq = task_rq_lock(p, &rf);
+ update_rq_clock(rq);
+-
+- __do_set_cpus_allowed_tail(p, &p->cpus_mask);
++ arg.dest_cpu = select_fallback_rq(cpu, p);
+ task_rq_unlock(rq, p, &rf);
+
+- p->migrate_disable_update = 0;
+-
+- WARN_ON(smp_processor_id() != cpu);
+- if (!cpumask_test_cpu(cpu, &p->cpus_mask)) {
+- struct migration_arg arg = { p };
+- struct rq_flags rf;
++ preempt_lazy_enable();
++ preempt_enable();
+
+- rq = task_rq_lock(p, &rf);
+- update_rq_clock(rq);
+- arg.dest_cpu = select_fallback_rq(cpu, p);
+- task_rq_unlock(rq, p, &rf);
+-
+- unpin_current_cpu();
+- preempt_lazy_enable();
+- preempt_enable();
+-
+- sleeping_lock_inc();
+- stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
+- sleeping_lock_dec();
+- tlb_migrate_finish(p->mm);
++ sleeping_lock_inc();
++ stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
++ sleeping_lock_dec();
++ tlb_migrate_finish(p->mm);
+
+- return;
+- }
++ return;
+ }
+- unpin_current_cpu();
++
++out:
+ preempt_lazy_enable();
+ preempt_enable();
+ }
+ EXPORT_SYMBOL(migrate_enable);
+
+-#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
+-void migrate_disable(void)
++int cpu_nr_pinned(int cpu)
+ {
+-#ifdef CONFIG_SCHED_DEBUG
+- struct task_struct *p = current;
++ struct rq *rq = cpu_rq(cpu);
+
+- if (in_atomic() || irqs_disabled()) {
+- p->migrate_disable_atomic++;
+- return;
+- }
++ return rq->nr_pinned;
++}
+
+- if (unlikely(p->migrate_disable_atomic)) {
+- tracing_off();
+- WARN_ON_ONCE(1);
+- }
++#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE)
++static void migrate_disabled_sched(struct task_struct *p)
++{
++}
+
+- p->migrate_disable++;
++void migrate_disable(void)
++{
++#ifdef CONFIG_SCHED_DEBUG
++ current->migrate_disable++;
+ #endif
+ barrier();
+ }
+@@ -7394,20 +7366,14 @@ void migrate_enable(void)
+ #ifdef CONFIG_SCHED_DEBUG
+ struct task_struct *p = current;
+
+- if (in_atomic() || irqs_disabled()) {
+- p->migrate_disable_atomic--;
+- return;
+- }
+-
+- if (unlikely(p->migrate_disable_atomic)) {
+- tracing_off();
+- WARN_ON_ONCE(1);
+- }
+-
+ WARN_ON_ONCE(p->migrate_disable <= 0);
+ p->migrate_disable--;
+ #endif
+ barrier();
+ }
+ EXPORT_SYMBOL(migrate_enable);
++#else
++static void migrate_disabled_sched(struct task_struct *p)
++{
++}
+ #endif
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index 87a05bb90124..45b3f135d205 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -915,6 +915,10 @@ struct rq {
+ /* Must be inspected within a rcu lock section */
+ struct cpuidle_state *idle_state;
+ #endif
++
++#if defined(CONFIG_PREEMPT_RT_BASE) && defined(CONFIG_SMP)
++ int nr_pinned;
++#endif
+ };
+
+ static inline int cpu_of(struct rq *rq)
+diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
+index b8a8a8db2d75..0c80992aa337 100644
+--- a/lib/smp_processor_id.c
++++ b/lib/smp_processor_id.c
+@@ -22,6 +22,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
+ * Kernel threads bound to a single CPU can safely use
+ * smp_processor_id():
+ */
++ if (current->migrate_disable)
++ goto out;
++
+ if (current->nr_cpus_allowed == 1)
+ goto out;
+
+--
+2.36.1
+