summaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:35:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 17:39:31 +0000
commit85c675d0d09a45a135bddd15d7b385f8758c32fb (patch)
tree76267dbc9b9a130337be3640948fe397b04ac629 /kernel/rcu/tree.c
parentAdding upstream version 6.6.15. (diff)
downloadlinux-85c675d0d09a45a135bddd15d7b385f8758c32fb.tar.xz
linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.zip
Adding upstream version 6.7.7.upstream/6.7.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r--kernel/rcu/tree.c242
1 files changed, 161 insertions, 81 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4fe47ed95e..157f3ca2a9 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1298,7 +1298,7 @@ EXPORT_SYMBOL_GPL(rcu_gp_slow_register);
/* Unregister a counter, with NULL for not caring which. */
void rcu_gp_slow_unregister(atomic_t *rgssp)
{
- WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress);
+ WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress && rcu_gp_slow_suppress != NULL);
WRITE_ONCE(rcu_gp_slow_suppress, NULL);
}
@@ -2185,6 +2185,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
trace_rcu_invoke_callback(rcu_state.name, rhp);
f = rhp->func;
+ debug_rcu_head_callback(rhp);
WRITE_ONCE(rhp->func, (rcu_callback_t)0L);
f(rhp);
@@ -2772,7 +2773,7 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in)
*/
void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
{
- return __call_rcu_common(head, func, false);
+ __call_rcu_common(head, func, false);
}
EXPORT_SYMBOL_GPL(call_rcu_hurry);
#endif
@@ -2823,7 +2824,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry);
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
- return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
+ __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
}
EXPORT_SYMBOL_GPL(call_rcu);
@@ -3516,13 +3517,6 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
return freed == 0 ? SHRINK_STOP : freed;
}
-static struct shrinker kfree_rcu_shrinker = {
- .count_objects = kfree_rcu_shrink_count,
- .scan_objects = kfree_rcu_shrink_scan,
- .batch = 0,
- .seeks = DEFAULT_SEEKS,
-};
-
void __init kfree_rcu_scheduler_running(void)
{
int cpu;
@@ -4150,6 +4144,82 @@ retry:
}
EXPORT_SYMBOL_GPL(rcu_barrier);
+static unsigned long rcu_barrier_last_throttle;
+
+/**
+ * rcu_barrier_throttled - Do rcu_barrier(), but limit to one per second
+ *
+ * This can be thought of as guard rails around rcu_barrier() that
+ * permits unrestricted userspace use, at least assuming the hardware's
+ * try_cmpxchg() is robust. There will be at most one call per second to
+ * rcu_barrier() system-wide from use of this function, which means that
+ * callers might needlessly wait a second or three.
+ *
+ * This is intended for use by test suites to avoid OOM by flushing RCU
+ * callbacks from the previous test before starting the next. See the
+ * rcutree.do_rcu_barrier module parameter for more information.
+ *
+ * Why not simply make rcu_barrier() more scalable? That might be
+ * the eventual endpoint, but let's keep it simple for the time being.
+ * Note that the module parameter infrastructure serializes calls to a
+ * given .set() function, but should concurrent .set() invocation ever be
+ * possible, we are ready!
+ */
+static void rcu_barrier_throttled(void)
+{
+ unsigned long j = jiffies;
+ unsigned long old = READ_ONCE(rcu_barrier_last_throttle);
+ unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
+
+ while (time_in_range(j, old, old + HZ / 16) ||
+ !try_cmpxchg(&rcu_barrier_last_throttle, &old, j)) {
+ schedule_timeout_idle(HZ / 16);
+ if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
+ smp_mb(); /* caller's subsequent code after above check. */
+ return;
+ }
+ j = jiffies;
+ old = READ_ONCE(rcu_barrier_last_throttle);
+ }
+ rcu_barrier();
+}
+
+/*
+ * Invoke rcu_barrier_throttled() when a rcutree.do_rcu_barrier
+ * request arrives. We insist on a true value to allow for possible
+ * future expansion.
+ */
+static int param_set_do_rcu_barrier(const char *val, const struct kernel_param *kp)
+{
+ bool b;
+ int ret;
+
+ if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING)
+ return -EAGAIN;
+ ret = kstrtobool(val, &b);
+ if (!ret && b) {
+ atomic_inc((atomic_t *)kp->arg);
+ rcu_barrier_throttled();
+ atomic_dec((atomic_t *)kp->arg);
+ }
+ return ret;
+}
+
+/*
+ * Output the number of outstanding rcutree.do_rcu_barrier requests.
+ */
+static int param_get_do_rcu_barrier(char *buffer, const struct kernel_param *kp)
+{
+ return sprintf(buffer, "%d\n", atomic_read((atomic_t *)kp->arg));
+}
+
+static const struct kernel_param_ops do_rcu_barrier_ops = {
+ .set = param_set_do_rcu_barrier,
+ .get = param_get_do_rcu_barrier,
+};
+static atomic_t do_rcu_barrier;
+module_param_cb(do_rcu_barrier, &do_rcu_barrier_ops, &do_rcu_barrier, 0644);
+
/*
* Compute the mask of online CPUs for the specified rcu_node structure.
* This will not be stable unless the rcu_node structure's ->lock is
@@ -4204,7 +4274,7 @@ bool rcu_lockdep_current_cpu_online(void)
rdp = this_cpu_ptr(&rcu_data);
/*
* Strictly, we care here about the case where the current CPU is
- * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask
+ * in rcutree_report_cpu_starting() and thus has an excuse for rdp->grpmask
* not being up to date. So arch_spin_is_locked() might have a
* false positive if it's held by some *other* CPU, but that's
* OK because that just means a false *negative* on the warning.
@@ -4226,25 +4296,6 @@ static bool rcu_init_invoked(void)
}
/*
- * Near the end of the offline process. Trace the fact that this CPU
- * is going offline.
- */
-int rcutree_dying_cpu(unsigned int cpu)
-{
- bool blkd;
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
- struct rcu_node *rnp = rdp->mynode;
-
- if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
- return 0;
-
- blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
- trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
- blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
- return 0;
-}
-
-/*
* All CPUs for the specified rcu_node structure have gone offline,
* and all tasks that were preempted within an RCU read-side critical
* section while running on one of those CPUs have since exited their RCU
@@ -4290,23 +4341,6 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
}
/*
- * The CPU has been completely removed, and some other CPU is reporting
- * this fact from process context. Do the remainder of the cleanup.
- * There can only be one CPU hotplug operation at a time, so no need for
- * explicit locking.
- */
-int rcutree_dead_cpu(unsigned int cpu)
-{
- if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
- return 0;
-
- WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
- // Stop-machine done, so allow nohz_full to disable tick.
- tick_dep_clear(TICK_DEP_BIT_RCU);
- return 0;
-}
-
-/*
* Propagate ->qsinitmask bits up the rcu_node tree to account for the
* first CPU in a given leaf rcu_node structure coming online. The caller
* must hold the corresponding leaf rcu_node ->lock with interrupts
@@ -4459,29 +4493,6 @@ int rcutree_online_cpu(unsigned int cpu)
}
/*
- * Near the beginning of the process. The CPU is still very much alive
- * with pretty much all services enabled.
- */
-int rcutree_offline_cpu(unsigned int cpu)
-{
- unsigned long flags;
- struct rcu_data *rdp;
- struct rcu_node *rnp;
-
- rdp = per_cpu_ptr(&rcu_data, cpu);
- rnp = rdp->mynode;
- raw_spin_lock_irqsave_rcu_node(rnp, flags);
- rnp->ffmask &= ~rdp->grpmask;
- raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-
- rcutree_affinity_setting(cpu, cpu);
-
- // nohz_full CPUs need the tick for stop-machine to work quickly
- tick_dep_set(TICK_DEP_BIT_RCU);
- return 0;
-}
-
-/*
* Mark the specified CPU as being online so that subsequent grace periods
* (both expedited and normal) will wait on it. Note that this means that
* incoming CPUs are not allowed to use RCU read-side critical sections
@@ -4492,8 +4503,10 @@ int rcutree_offline_cpu(unsigned int cpu)
* from the incoming CPU rather than from the cpuhp_step mechanism.
* This is because this function must be invoked at a precise location.
* This incoming CPU must not have enabled interrupts yet.
+ *
+ * This mirrors the effects of rcutree_report_cpu_dead().
*/
-void rcu_cpu_starting(unsigned int cpu)
+void rcutree_report_cpu_starting(unsigned int cpu)
{
unsigned long mask;
struct rcu_data *rdp;
@@ -4547,14 +4560,21 @@ void rcu_cpu_starting(unsigned int cpu)
* Note that this function is special in that it is invoked directly
* from the outgoing CPU rather than from the cpuhp_step mechanism.
* This is because this function must be invoked at a precise location.
+ *
+ * This mirrors the effect of rcutree_report_cpu_starting().
*/
-void rcu_report_dead(unsigned int cpu)
+void rcutree_report_cpu_dead(void)
{
- unsigned long flags, seq_flags;
+ unsigned long flags;
unsigned long mask;
- struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
+ /*
+ * IRQS must be disabled from now on and until the CPU dies, or an interrupt
+ * may introduce a new READ-side while it is actually off the QS masks.
+ */
+ lockdep_assert_irqs_disabled();
// Do any dangling deferred wakeups.
do_nocb_deferred_wakeup(rdp);
@@ -4562,7 +4582,6 @@ void rcu_report_dead(unsigned int cpu)
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
mask = rdp->grpmask;
- local_irq_save(seq_flags);
arch_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
@@ -4576,8 +4595,6 @@ void rcu_report_dead(unsigned int cpu)
WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
arch_spin_unlock(&rcu_state.ofl_lock);
- local_irq_restore(seq_flags);
-
rdp->cpu_started = false;
}
@@ -4632,7 +4649,60 @@ void rcutree_migrate_callbacks(int cpu)
cpu, rcu_segcblist_n_cbs(&rdp->cblist),
rcu_segcblist_first_cb(&rdp->cblist));
}
-#endif
+
+/*
+ * The CPU has been completely removed, and some other CPU is reporting
+ * this fact from process context. Do the remainder of the cleanup.
+ * There can only be one CPU hotplug operation at a time, so no need for
+ * explicit locking.
+ */
+int rcutree_dead_cpu(unsigned int cpu)
+{
+ WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1);
+ // Stop-machine done, so allow nohz_full to disable tick.
+ tick_dep_clear(TICK_DEP_BIT_RCU);
+ return 0;
+}
+
+/*
+ * Near the end of the offline process. Trace the fact that this CPU
+ * is going offline.
+ */
+int rcutree_dying_cpu(unsigned int cpu)
+{
+ bool blkd;
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+ struct rcu_node *rnp = rdp->mynode;
+
+ blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
+ trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
+ blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
+ return 0;
+}
+
+/*
+ * Near the beginning of the process. The CPU is still very much alive
+ * with pretty much all services enabled.
+ */
+int rcutree_offline_cpu(unsigned int cpu)
+{
+ unsigned long flags;
+ struct rcu_data *rdp;
+ struct rcu_node *rnp;
+
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ rnp = rdp->mynode;
+ raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ rnp->ffmask &= ~rdp->grpmask;
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+
+ rcutree_affinity_setting(cpu, cpu);
+
+ // nohz_full CPUs need the tick for stop-machine to work quickly
+ tick_dep_set(TICK_DEP_BIT_RCU);
+ return 0;
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
/*
* On non-huge systems, use expedited RCU grace periods to make suspend
@@ -5005,6 +5075,7 @@ static void __init kfree_rcu_batch_init(void)
{
int cpu;
int i, j;
+ struct shrinker *kfree_rcu_shrinker;
/* Clamp it to [0:100] seconds interval. */
if (rcu_delay_page_cache_fill_msec < 0 ||
@@ -5036,8 +5107,17 @@ static void __init kfree_rcu_batch_init(void)
INIT_DELAYED_WORK(&krcp->page_cache_work, fill_page_cache_func);
krcp->initialized = true;
}
- if (register_shrinker(&kfree_rcu_shrinker, "rcu-kfree"))
- pr_err("Failed to register kfree_rcu() shrinker!\n");
+
+ kfree_rcu_shrinker = shrinker_alloc(0, "rcu-kfree");
+ if (!kfree_rcu_shrinker) {
+ pr_err("Failed to allocate kfree_rcu() shrinker!\n");
+ return;
+ }
+
+ kfree_rcu_shrinker->count_objects = kfree_rcu_shrink_count;
+ kfree_rcu_shrinker->scan_objects = kfree_rcu_shrink_scan;
+
+ shrinker_register(kfree_rcu_shrinker);
}
void __init rcu_init(void)
@@ -5064,7 +5144,7 @@ void __init rcu_init(void)
pm_notifier(rcu_pm_notify, 0);
WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot.
rcutree_prepare_cpu(cpu);
- rcu_cpu_starting(cpu);
+ rcutree_report_cpu_starting(cpu);
rcutree_online_cpu(cpu);
/* Create workqueue for Tree SRCU and for expedited GPs. */