summaryrefslogtreecommitdiffstats
path: root/kernel/sched/stats.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--kernel/sched/stats.c231
1 files changed, 231 insertions, 0 deletions
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
new file mode 100644
index 0000000000..857f837f52
--- /dev/null
+++ b/kernel/sched/stats.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * /proc/schedstat implementation
+ */
+
+void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
+ struct sched_statistics *stats)
+{
+ u64 wait_start, prev_wait_start;
+
+ wait_start = rq_clock(rq);
+ prev_wait_start = schedstat_val(stats->wait_start);
+
+ if (p && likely(wait_start > prev_wait_start))
+ wait_start -= prev_wait_start;
+
+ __schedstat_set(stats->wait_start, wait_start);
+}
+
+void __update_stats_wait_end(struct rq *rq, struct task_struct *p,
+ struct sched_statistics *stats)
+{
+ u64 delta = rq_clock(rq) - schedstat_val(stats->wait_start);
+
+ if (p) {
+ if (task_on_rq_migrating(p)) {
+ /*
+ * Preserve migrating task's wait time so wait_start
+ * time stamp can be adjusted to accumulate wait time
+ * prior to migration.
+ */
+ __schedstat_set(stats->wait_start, delta);
+
+ return;
+ }
+
+ trace_sched_stat_wait(p, delta);
+ }
+
+ __schedstat_set(stats->wait_max,
+ max(schedstat_val(stats->wait_max), delta));
+ __schedstat_inc(stats->wait_count);
+ __schedstat_add(stats->wait_sum, delta);
+ __schedstat_set(stats->wait_start, 0);
+}
+
+void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
+ struct sched_statistics *stats)
+{
+ u64 sleep_start, block_start;
+
+ sleep_start = schedstat_val(stats->sleep_start);
+ block_start = schedstat_val(stats->block_start);
+
+ if (sleep_start) {
+ u64 delta = rq_clock(rq) - sleep_start;
+
+ if ((s64)delta < 0)
+ delta = 0;
+
+ if (unlikely(delta > schedstat_val(stats->sleep_max)))
+ __schedstat_set(stats->sleep_max, delta);
+
+ __schedstat_set(stats->sleep_start, 0);
+ __schedstat_add(stats->sum_sleep_runtime, delta);
+
+ if (p) {
+ account_scheduler_latency(p, delta >> 10, 1);
+ trace_sched_stat_sleep(p, delta);
+ }
+ }
+
+ if (block_start) {
+ u64 delta = rq_clock(rq) - block_start;
+
+ if ((s64)delta < 0)
+ delta = 0;
+
+ if (unlikely(delta > schedstat_val(stats->block_max)))
+ __schedstat_set(stats->block_max, delta);
+
+ __schedstat_set(stats->block_start, 0);
+ __schedstat_add(stats->sum_sleep_runtime, delta);
+ __schedstat_add(stats->sum_block_runtime, delta);
+
+ if (p) {
+ if (p->in_iowait) {
+ __schedstat_add(stats->iowait_sum, delta);
+ __schedstat_inc(stats->iowait_count);
+ trace_sched_stat_iowait(p, delta);
+ }
+
+ trace_sched_stat_blocked(p, delta);
+
+ /*
+ * Blocking time is in units of nanosecs, so shift by
+ * 20 to get a milliseconds-range estimation of the
+ * amount of time that the task spent sleeping:
+ */
+ if (unlikely(prof_on == SLEEP_PROFILING)) {
+ profile_hits(SLEEP_PROFILING,
+ (void *)get_wchan(p),
+ delta >> 20);
+ }
+ account_scheduler_latency(p, delta >> 10, 0);
+ }
+ }
+}
+
+/*
+ * Current schedstat API version.
+ *
+ * Bump this up when changing the output format or the meaning of an existing
+ * format, so that tools can adapt (or abort)
+ */
+#define SCHEDSTAT_VERSION 15
+
+static int show_schedstat(struct seq_file *seq, void *v)
+{
+ int cpu;
+
+ if (v == (void *)1) {
+ seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
+ seq_printf(seq, "timestamp %lu\n", jiffies);
+ } else {
+ struct rq *rq;
+#ifdef CONFIG_SMP
+ struct sched_domain *sd;
+ int dcount = 0;
+#endif
+ cpu = (unsigned long)(v - 2);
+ rq = cpu_rq(cpu);
+
+ /* runqueue-specific stats */
+ seq_printf(seq,
+ "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
+ cpu, rq->yld_count,
+ rq->sched_count, rq->sched_goidle,
+ rq->ttwu_count, rq->ttwu_local,
+ rq->rq_cpu_time,
+ rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
+
+ seq_printf(seq, "\n");
+
+#ifdef CONFIG_SMP
+ /* domain-specific stats */
+ rcu_read_lock();
+ for_each_domain(cpu, sd) {
+ enum cpu_idle_type itype;
+
+ seq_printf(seq, "domain%d %*pb", dcount++,
+ cpumask_pr_args(sched_domain_span(sd)));
+ for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
+ itype++) {
+ seq_printf(seq, " %u %u %u %u %u %u %u %u",
+ sd->lb_count[itype],
+ sd->lb_balanced[itype],
+ sd->lb_failed[itype],
+ sd->lb_imbalance[itype],
+ sd->lb_gained[itype],
+ sd->lb_hot_gained[itype],
+ sd->lb_nobusyq[itype],
+ sd->lb_nobusyg[itype]);
+ }
+ seq_printf(seq,
+ " %u %u %u %u %u %u %u %u %u %u %u %u\n",
+ sd->alb_count, sd->alb_failed, sd->alb_pushed,
+ sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
+ sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
+ sd->ttwu_wake_remote, sd->ttwu_move_affine,
+ sd->ttwu_move_balance);
+ }
+ rcu_read_unlock();
+#endif
+ }
+ return 0;
+}
+
+/*
+ * This iterator needs some explanation.
+ * It returns 1 for the header position.
+ * This means 2 is cpu 0.
+ * In a hotplugged system some CPUs, including cpu 0, may be missing so we have
+ * to use cpumask_* to iterate over the CPUs.
+ */
+static void *schedstat_start(struct seq_file *file, loff_t *offset)
+{
+ unsigned long n = *offset;
+
+ if (n == 0)
+ return (void *) 1;
+
+ n--;
+
+ if (n > 0)
+ n = cpumask_next(n - 1, cpu_online_mask);
+ else
+ n = cpumask_first(cpu_online_mask);
+
+ *offset = n + 1;
+
+ if (n < nr_cpu_ids)
+ return (void *)(unsigned long)(n + 2);
+
+ return NULL;
+}
+
+static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset)
+{
+ (*offset)++;
+
+ return schedstat_start(file, offset);
+}
+
+static void schedstat_stop(struct seq_file *file, void *data)
+{
+}
+
+static const struct seq_operations schedstat_sops = {
+ .start = schedstat_start,
+ .next = schedstat_next,
+ .stop = schedstat_stop,
+ .show = show_schedstat,
+};
+
+static int __init proc_schedstat_init(void)
+{
+ proc_create_seq("schedstat", 0, NULL, &schedstat_sops);
+ return 0;
+}
+subsys_initcall(proc_schedstat_init);