summaryrefslogtreecommitdiffstats
path: root/kernel/sched/debug.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/debug.c')
-rw-r--r--kernel/sched/debug.c1042
1 files changed, 1042 insertions, 0 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
new file mode 100644
index 000000000..e7df4f293
--- /dev/null
+++ b/kernel/sched/debug.c
@@ -0,0 +1,1042 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kernel/sched/debug.c
+ *
+ * Print the CFS rbtree and other debugging details
+ *
+ * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
+ */
+#include "sched.h"
+
+/*
+ * This allows printing both to /proc/sched_debug and
+ * to the console
+ */
+#define SEQ_printf(m, x...) \
+ do { \
+ if (m) \
+ seq_printf(m, x); \
+ else \
+ pr_cont(x); \
+ } while (0)
+
+/*
+ * Ease the printing of nsec fields:
+ */
+static long long nsec_high(unsigned long long nsec)
+{
+ if ((long long)nsec < 0) {
+ nsec = -nsec;
+ do_div(nsec, 1000000);
+ return -nsec;
+ }
+ do_div(nsec, 1000000);
+
+ return nsec;
+}
+
+static unsigned long nsec_low(unsigned long long nsec)
+{
+ if ((long long)nsec < 0)
+ nsec = -nsec;
+
+ return do_div(nsec, 1000000);
+}
+
+#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
+
+#define SCHED_FEAT(name, enabled) \
+ #name ,
+
+static const char * const sched_feat_names[] = {
+#include "features.h"
+};
+
+#undef SCHED_FEAT
+
+static int sched_feat_show(struct seq_file *m, void *v)
+{
+ int i;
+
+ for (i = 0; i < __SCHED_FEAT_NR; i++) {
+ if (!(sysctl_sched_features & (1UL << i)))
+ seq_puts(m, "NO_");
+ seq_printf(m, "%s ", sched_feat_names[i]);
+ }
+ seq_puts(m, "\n");
+
+ return 0;
+}
+
+#ifdef CONFIG_JUMP_LABEL
+
+#define jump_label_key__true STATIC_KEY_INIT_TRUE
+#define jump_label_key__false STATIC_KEY_INIT_FALSE
+
+#define SCHED_FEAT(name, enabled) \
+ jump_label_key__##enabled ,
+
+struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
+#include "features.h"
+};
+
+#undef SCHED_FEAT
+
+static void sched_feat_disable(int i)
+{
+ static_key_disable_cpuslocked(&sched_feat_keys[i]);
+}
+
+static void sched_feat_enable(int i)
+{
+ static_key_enable_cpuslocked(&sched_feat_keys[i]);
+}
+#else
+static void sched_feat_disable(int i) { };
+static void sched_feat_enable(int i) { };
+#endif /* CONFIG_JUMP_LABEL */
+
+static int sched_feat_set(char *cmp)
+{
+ int i;
+ int neg = 0;
+
+ if (strncmp(cmp, "NO_", 3) == 0) {
+ neg = 1;
+ cmp += 3;
+ }
+
+ i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp);
+ if (i < 0)
+ return i;
+
+ if (neg) {
+ sysctl_sched_features &= ~(1UL << i);
+ sched_feat_disable(i);
+ } else {
+ sysctl_sched_features |= (1UL << i);
+ sched_feat_enable(i);
+ }
+
+ return 0;
+}
+
+static ssize_t
+sched_feat_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ char buf[64];
+ char *cmp;
+ int ret;
+ struct inode *inode;
+
+ if (cnt > 63)
+ cnt = 63;
+
+ if (copy_from_user(&buf, ubuf, cnt))
+ return -EFAULT;
+
+ buf[cnt] = 0;
+ cmp = strstrip(buf);
+
+ /* Ensure the static_key remains in a consistent state */
+ inode = file_inode(filp);
+ cpus_read_lock();
+ inode_lock(inode);
+ ret = sched_feat_set(cmp);
+ inode_unlock(inode);
+ cpus_read_unlock();
+ if (ret < 0)
+ return ret;
+
+ *ppos += cnt;
+
+ return cnt;
+}
+
+static int sched_feat_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, sched_feat_show, NULL);
+}
+
+static const struct file_operations sched_feat_fops = {
+ .open = sched_feat_open,
+ .write = sched_feat_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+__read_mostly bool sched_debug_enabled;
+
+static __init int sched_init_debug(void)
+{
+ debugfs_create_file("sched_features", 0644, NULL, NULL,
+ &sched_feat_fops);
+
+ debugfs_create_bool("sched_debug", 0644, NULL,
+ &sched_debug_enabled);
+
+ return 0;
+}
+late_initcall(sched_init_debug);
+
+#ifdef CONFIG_SMP
+
+#ifdef CONFIG_SYSCTL
+
+static struct ctl_table sd_ctl_dir[] = {
+ {
+ .procname = "sched_domain",
+ .mode = 0555,
+ },
+ {}
+};
+
+static struct ctl_table sd_ctl_root[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = sd_ctl_dir,
+ },
+ {}
+};
+
+static struct ctl_table *sd_alloc_ctl_entry(int n)
+{
+ struct ctl_table *entry =
+ kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
+
+ return entry;
+}
+
+static void sd_free_ctl_entry(struct ctl_table **tablep)
+{
+ struct ctl_table *entry;
+
+ /*
+ * In the intermediate directories, both the child directory and
+ * procname are dynamically allocated and could fail but the mode
+ * will always be set. In the lowest directory the names are
+ * static strings and all have proc handlers.
+ */
+ for (entry = *tablep; entry->mode; entry++) {
+ if (entry->child)
+ sd_free_ctl_entry(&entry->child);
+ if (entry->proc_handler == NULL)
+ kfree(entry->procname);
+ }
+
+ kfree(*tablep);
+ *tablep = NULL;
+}
+
+static void
+set_table_entry(struct ctl_table *entry,
+ const char *procname, void *data, int maxlen,
+ umode_t mode, proc_handler *proc_handler)
+{
+ entry->procname = procname;
+ entry->data = data;
+ entry->maxlen = maxlen;
+ entry->mode = mode;
+ entry->proc_handler = proc_handler;
+}
+
+static int sd_ctl_doflags(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ unsigned long flags = *(unsigned long *)table->data;
+ size_t data_size = 0;
+ size_t len = 0;
+ char *tmp, *buf;
+ int idx;
+
+ if (write)
+ return 0;
+
+ for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
+ char *name = sd_flag_debug[idx].name;
+
+ /* Name plus whitespace */
+ data_size += strlen(name) + 1;
+ }
+
+ if (*ppos > data_size) {
+ *lenp = 0;
+ return 0;
+ }
+
+ buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
+ char *name = sd_flag_debug[idx].name;
+
+ len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
+ }
+
+ tmp = buf + *ppos;
+ len -= *ppos;
+
+ if (len > *lenp)
+ len = *lenp;
+ if (len)
+ memcpy(buffer, tmp, len);
+ if (len < *lenp) {
+ ((char *)buffer)[len] = '\n';
+ len++;
+ }
+
+ *lenp = len;
+ *ppos += len;
+
+ kfree(buf);
+
+ return 0;
+}
+
+static struct ctl_table *
+sd_alloc_ctl_domain_table(struct sched_domain *sd)
+{
+ struct ctl_table *table = sd_alloc_ctl_entry(9);
+
+ if (table == NULL)
+ return NULL;
+
+ set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax);
+ set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax);
+ set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax);
+ set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax);
+ set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax);
+ set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0444, sd_ctl_doflags);
+ set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
+ set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring);
+ /* &table[8] is terminator */
+
+ return table;
+}
+
+static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
+{
+ struct ctl_table *entry, *table;
+ struct sched_domain *sd;
+ int domain_num = 0, i;
+ char buf[32];
+
+ for_each_domain(cpu, sd)
+ domain_num++;
+ entry = table = sd_alloc_ctl_entry(domain_num + 1);
+ if (table == NULL)
+ return NULL;
+
+ i = 0;
+ for_each_domain(cpu, sd) {
+ snprintf(buf, 32, "domain%d", i);
+ entry->procname = kstrdup(buf, GFP_KERNEL);
+ entry->mode = 0555;
+ entry->child = sd_alloc_ctl_domain_table(sd);
+ entry++;
+ i++;
+ }
+ return table;
+}
+
+static cpumask_var_t sd_sysctl_cpus;
+static struct ctl_table_header *sd_sysctl_header;
+
+void register_sched_domain_sysctl(void)
+{
+ static struct ctl_table *cpu_entries;
+ static struct ctl_table **cpu_idx;
+ static bool init_done = false;
+ char buf[32];
+ int i;
+
+ if (!cpu_entries) {
+ cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
+ if (!cpu_entries)
+ return;
+
+ WARN_ON(sd_ctl_dir[0].child);
+ sd_ctl_dir[0].child = cpu_entries;
+ }
+
+ if (!cpu_idx) {
+ struct ctl_table *e = cpu_entries;
+
+ cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
+ if (!cpu_idx)
+ return;
+
+ /* deal with sparse possible map */
+ for_each_possible_cpu(i) {
+ cpu_idx[i] = e;
+ e++;
+ }
+ }
+
+ if (!cpumask_available(sd_sysctl_cpus)) {
+ if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
+ return;
+ }
+
+ if (!init_done) {
+ init_done = true;
+ /* init to possible to not have holes in @cpu_entries */
+ cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
+ }
+
+ for_each_cpu(i, sd_sysctl_cpus) {
+ struct ctl_table *e = cpu_idx[i];
+
+ if (e->child)
+ sd_free_ctl_entry(&e->child);
+
+ if (!e->procname) {
+ snprintf(buf, 32, "cpu%d", i);
+ e->procname = kstrdup(buf, GFP_KERNEL);
+ }
+ e->mode = 0555;
+ e->child = sd_alloc_ctl_cpu_table(i);
+
+ __cpumask_clear_cpu(i, sd_sysctl_cpus);
+ }
+
+ WARN_ON(sd_sysctl_header);
+ sd_sysctl_header = register_sysctl_table(sd_ctl_root);
+}
+
+void dirty_sched_domain_sysctl(int cpu)
+{
+ if (cpumask_available(sd_sysctl_cpus))
+ __cpumask_set_cpu(cpu, sd_sysctl_cpus);
+}
+
+/* may be called multiple times per register */
+void unregister_sched_domain_sysctl(void)
+{
+ unregister_sysctl_table(sd_sysctl_header);
+ sd_sysctl_header = NULL;
+}
+#endif /* CONFIG_SYSCTL */
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
+{
+ struct sched_entity *se = tg->se[cpu];
+
+#define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
+#define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
+#define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
+#define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
+
+ if (!se)
+ return;
+
+ PN(se->exec_start);
+ PN(se->vruntime);
+ PN(se->sum_exec_runtime);
+
+ if (schedstat_enabled()) {
+ PN_SCHEDSTAT(se->statistics.wait_start);
+ PN_SCHEDSTAT(se->statistics.sleep_start);
+ PN_SCHEDSTAT(se->statistics.block_start);
+ PN_SCHEDSTAT(se->statistics.sleep_max);
+ PN_SCHEDSTAT(se->statistics.block_max);
+ PN_SCHEDSTAT(se->statistics.exec_max);
+ PN_SCHEDSTAT(se->statistics.slice_max);
+ PN_SCHEDSTAT(se->statistics.wait_max);
+ PN_SCHEDSTAT(se->statistics.wait_sum);
+ P_SCHEDSTAT(se->statistics.wait_count);
+ }
+
+ P(se->load.weight);
+#ifdef CONFIG_SMP
+ P(se->avg.load_avg);
+ P(se->avg.util_avg);
+ P(se->avg.runnable_avg);
+#endif
+
+#undef PN_SCHEDSTAT
+#undef PN
+#undef P_SCHEDSTAT
+#undef P
+}
+#endif
+
+#ifdef CONFIG_CGROUP_SCHED
+static DEFINE_SPINLOCK(sched_debug_lock);
+static char group_path[PATH_MAX];
+
+static void task_group_path(struct task_group *tg, char *path, int plen)
+{
+ if (autogroup_path(tg, path, plen))
+ return;
+
+ cgroup_path(tg->css.cgroup, path, plen);
+}
+
+/*
+ * Only 1 SEQ_printf_task_group_path() caller can use the full length
+ * group_path[] for cgroup path. Other simultaneous callers will have
+ * to use a shorter stack buffer. A "..." suffix is appended at the end
+ * of the stack buffer so that it will show up in case the output length
+ * matches the given buffer size to indicate possible path name truncation.
+ */
+#define SEQ_printf_task_group_path(m, tg, fmt...) \
+{ \
+ if (spin_trylock(&sched_debug_lock)) { \
+ task_group_path(tg, group_path, sizeof(group_path)); \
+ SEQ_printf(m, fmt, group_path); \
+ spin_unlock(&sched_debug_lock); \
+ } else { \
+ char buf[128]; \
+ char *bufend = buf + sizeof(buf) - 3; \
+ task_group_path(tg, buf, bufend - buf); \
+ strcpy(bufend - 1, "..."); \
+ SEQ_printf(m, fmt, buf); \
+ } \
+}
+#endif
+
+static void
+print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
+{
+ if (rq->curr == p)
+ SEQ_printf(m, ">R");
+ else
+ SEQ_printf(m, " %c", task_state_to_char(p));
+
+ SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
+ p->comm, task_pid_nr(p),
+ SPLIT_NS(p->se.vruntime),
+ (long long)(p->nvcsw + p->nivcsw),
+ p->prio);
+
+ SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
+ SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
+ SPLIT_NS(p->se.sum_exec_runtime),
+ SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
+
+#ifdef CONFIG_NUMA_BALANCING
+ SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
+#endif
+#ifdef CONFIG_CGROUP_SCHED
+ SEQ_printf_task_group_path(m, task_group(p), " %s")
+#endif
+
+ SEQ_printf(m, "\n");
+}
+
+static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
+{
+ struct task_struct *g, *p;
+
+ SEQ_printf(m, "\n");
+ SEQ_printf(m, "runnable tasks:\n");
+ SEQ_printf(m, " S task PID tree-key switches prio"
+ " wait-time sum-exec sum-sleep\n");
+ SEQ_printf(m, "-------------------------------------------------------"
+ "------------------------------------------------------\n");
+
+ rcu_read_lock();
+ for_each_process_thread(g, p) {
+ if (task_cpu(p) != rq_cpu)
+ continue;
+
+ print_task(m, rq, p);
+ }
+ rcu_read_unlock();
+}
+
+void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+{
+ s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
+ spread, rq0_min_vruntime, spread0;
+ struct rq *rq = cpu_rq(cpu);
+ struct sched_entity *last;
+ unsigned long flags;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ SEQ_printf(m, "\n");
+ SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu);
+#else
+ SEQ_printf(m, "\n");
+ SEQ_printf(m, "cfs_rq[%d]:\n", cpu);
+#endif
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
+ SPLIT_NS(cfs_rq->exec_clock));
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ if (rb_first_cached(&cfs_rq->tasks_timeline))
+ MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
+ last = __pick_last_entity(cfs_rq);
+ if (last)
+ max_vruntime = last->vruntime;
+ min_vruntime = cfs_rq->min_vruntime;
+ rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
+ SPLIT_NS(MIN_vruntime));
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
+ SPLIT_NS(min_vruntime));
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime",
+ SPLIT_NS(max_vruntime));
+ spread = max_vruntime - MIN_vruntime;
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread",
+ SPLIT_NS(spread));
+ spread0 = min_vruntime - rq0_min_vruntime;
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
+ SPLIT_NS(spread0));
+ SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
+ cfs_rq->nr_spread_over);
+ SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
+ SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
+#ifdef CONFIG_SMP
+ SEQ_printf(m, " .%-30s: %lu\n", "load_avg",
+ cfs_rq->avg.load_avg);
+ SEQ_printf(m, " .%-30s: %lu\n", "runnable_avg",
+ cfs_rq->avg.runnable_avg);
+ SEQ_printf(m, " .%-30s: %lu\n", "util_avg",
+ cfs_rq->avg.util_avg);
+ SEQ_printf(m, " .%-30s: %u\n", "util_est_enqueued",
+ cfs_rq->avg.util_est.enqueued);
+ SEQ_printf(m, " .%-30s: %ld\n", "removed.load_avg",
+ cfs_rq->removed.load_avg);
+ SEQ_printf(m, " .%-30s: %ld\n", "removed.util_avg",
+ cfs_rq->removed.util_avg);
+ SEQ_printf(m, " .%-30s: %ld\n", "removed.runnable_avg",
+ cfs_rq->removed.runnable_avg);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ SEQ_printf(m, " .%-30s: %lu\n", "tg_load_avg_contrib",
+ cfs_rq->tg_load_avg_contrib);
+ SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg",
+ atomic_long_read(&cfs_rq->tg->load_avg));
+#endif
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+ SEQ_printf(m, " .%-30s: %d\n", "throttled",
+ cfs_rq->throttled);
+ SEQ_printf(m, " .%-30s: %d\n", "throttle_count",
+ cfs_rq->throttle_count);
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ print_cfs_group_stats(m, cpu, cfs_rq->tg);
+#endif
+}
+
+void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
+{
+#ifdef CONFIG_RT_GROUP_SCHED
+ SEQ_printf(m, "\n");
+ SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu);
+#else
+ SEQ_printf(m, "\n");
+ SEQ_printf(m, "rt_rq[%d]:\n", cpu);
+#endif
+
+#define P(x) \
+ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
+#define PU(x) \
+ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
+#define PN(x) \
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
+
+ PU(rt_nr_running);
+#ifdef CONFIG_SMP
+ PU(rt_nr_migratory);
+#endif
+ P(rt_throttled);
+ PN(rt_time);
+ PN(rt_runtime);
+
+#undef PN
+#undef PU
+#undef P
+}
+
+void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
+{
+ struct dl_bw *dl_bw;
+
+ SEQ_printf(m, "\n");
+ SEQ_printf(m, "dl_rq[%d]:\n", cpu);
+
+#define PU(x) \
+ SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
+
+ PU(dl_nr_running);
+#ifdef CONFIG_SMP
+ PU(dl_nr_migratory);
+ dl_bw = &cpu_rq(cpu)->rd->dl_bw;
+#else
+ dl_bw = &dl_rq->dl_bw;
+#endif
+ SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
+ SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
+
+#undef PU
+}
+
+static void print_cpu(struct seq_file *m, int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+
+#ifdef CONFIG_X86
+ {
+ unsigned int freq = cpu_khz ? : 1;
+
+ SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
+ cpu, freq / 1000, (freq % 1000));
+ }
+#else
+ SEQ_printf(m, "cpu#%d\n", cpu);
+#endif
+
+#define P(x) \
+do { \
+ if (sizeof(rq->x) == 4) \
+ SEQ_printf(m, " .%-30s: %ld\n", #x, (long)(rq->x)); \
+ else \
+ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\
+} while (0)
+
+#define PN(x) \
+ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
+
+ P(nr_running);
+ P(nr_switches);
+ P(nr_uninterruptible);
+ PN(next_balance);
+ SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
+ PN(clock);
+ PN(clock_task);
+#undef P
+#undef PN
+
+#ifdef CONFIG_SMP
+#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
+ P64(avg_idle);
+ P64(max_idle_balance_cost);
+#undef P64
+#endif
+
+#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n));
+ if (schedstat_enabled()) {
+ P(yld_count);
+ P(sched_count);
+ P(sched_goidle);
+ P(ttwu_count);
+ P(ttwu_local);
+ }
+#undef P
+
+ print_cfs_stats(m, cpu);
+ print_rt_stats(m, cpu);
+ print_dl_stats(m, cpu);
+
+ print_rq(m, rq, cpu);
+ SEQ_printf(m, "\n");
+}
+
+static const char *sched_tunable_scaling_names[] = {
+ "none",
+ "logarithmic",
+ "linear"
+};
+
+static void sched_debug_header(struct seq_file *m)
+{
+ u64 ktime, sched_clk, cpu_clk;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ ktime = ktime_to_ns(ktime_get());
+ sched_clk = sched_clock();
+ cpu_clk = local_clock();
+ local_irq_restore(flags);
+
+ SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version);
+
+#define P(x) \
+ SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
+#define PN(x) \
+ SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
+ PN(ktime);
+ PN(sched_clk);
+ PN(cpu_clk);
+ P(jiffies);
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+ P(sched_clock_stable());
+#endif
+#undef PN
+#undef P
+
+ SEQ_printf(m, "\n");
+ SEQ_printf(m, "sysctl_sched\n");
+
+#define P(x) \
+ SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
+#define PN(x) \
+ SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
+ PN(sysctl_sched_latency);
+ PN(sysctl_sched_min_granularity);
+ PN(sysctl_sched_wakeup_granularity);
+ P(sysctl_sched_child_runs_first);
+ P(sysctl_sched_features);
+#undef PN
+#undef P
+
+ SEQ_printf(m, " .%-40s: %d (%s)\n",
+ "sysctl_sched_tunable_scaling",
+ sysctl_sched_tunable_scaling,
+ sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
+ SEQ_printf(m, "\n");
+}
+
+static int sched_debug_show(struct seq_file *m, void *v)
+{
+ int cpu = (unsigned long)(v - 2);
+
+ if (cpu != -1)
+ print_cpu(m, cpu);
+ else
+ sched_debug_header(m);
+
+ return 0;
+}
+
+void sysrq_sched_debug_show(void)
+{
+ int cpu;
+
+ sched_debug_header(NULL);
+ for_each_online_cpu(cpu) {
+ /*
+ * Need to reset softlockup watchdogs on all CPUs, because
+ * another CPU might be blocked waiting for us to process
+ * an IPI or stop_machine.
+ */
+ touch_nmi_watchdog();
+ touch_all_softlockup_watchdogs();
+ print_cpu(NULL, cpu);
+ }
+}
+
+/*
+ * This itererator needs some explanation.
+ * It returns 1 for the header position.
+ * This means 2 is CPU 0.
+ * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
+ * to use cpumask_* to iterate over the CPUs.
+ */
+static void *sched_debug_start(struct seq_file *file, loff_t *offset)
+{
+ unsigned long n = *offset;
+
+ if (n == 0)
+ return (void *) 1;
+
+ n--;
+
+ if (n > 0)
+ n = cpumask_next(n - 1, cpu_online_mask);
+ else
+ n = cpumask_first(cpu_online_mask);
+
+ *offset = n + 1;
+
+ if (n < nr_cpu_ids)
+ return (void *)(unsigned long)(n + 2);
+
+ return NULL;
+}
+
+static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
+{
+ (*offset)++;
+ return sched_debug_start(file, offset);
+}
+
+static void sched_debug_stop(struct seq_file *file, void *data)
+{
+}
+
+static const struct seq_operations sched_debug_sops = {
+ .start = sched_debug_start,
+ .next = sched_debug_next,
+ .stop = sched_debug_stop,
+ .show = sched_debug_show,
+};
+
+static int __init init_sched_debug_procfs(void)
+{
+ if (!proc_create_seq("sched_debug", 0444, NULL, &sched_debug_sops))
+ return -ENOMEM;
+ return 0;
+}
+
+__initcall(init_sched_debug_procfs);
+
+#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
+#define __P(F) __PS(#F, F)
+#define P(F) __PS(#F, p->F)
+#define PM(F, M) __PS(#F, p->F & (M))
+#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
+#define __PN(F) __PSN(#F, F)
+#define PN(F) __PSN(#F, p->F)
+
+
+#ifdef CONFIG_NUMA_BALANCING
+void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
+ unsigned long tpf, unsigned long gsf, unsigned long gpf)
+{
+ SEQ_printf(m, "numa_faults node=%d ", node);
+ SEQ_printf(m, "task_private=%lu task_shared=%lu ", tpf, tsf);
+ SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gpf, gsf);
+}
+#endif
+
+
+static void sched_show_numa(struct task_struct *p, struct seq_file *m)
+{
+#ifdef CONFIG_NUMA_BALANCING
+ if (p->mm)
+ P(mm->numa_scan_seq);
+
+ P(numa_pages_migrated);
+ P(numa_preferred_nid);
+ P(total_numa_faults);
+ SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
+ task_node(p), task_numa_group_id(p));
+ show_numa_stats(p, m);
+#endif
+}
+
+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
+ struct seq_file *m)
+{
+ unsigned long nr_switches;
+
+ SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
+ get_nr_threads(p));
+ SEQ_printf(m,
+ "---------------------------------------------------------"
+ "----------\n");
+
+#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
+#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
+
+ PN(se.exec_start);
+ PN(se.vruntime);
+ PN(se.sum_exec_runtime);
+
+ nr_switches = p->nvcsw + p->nivcsw;
+
+ P(se.nr_migrations);
+
+ if (schedstat_enabled()) {
+ u64 avg_atom, avg_per_cpu;
+
+ PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
+ PN_SCHEDSTAT(se.statistics.wait_start);
+ PN_SCHEDSTAT(se.statistics.sleep_start);
+ PN_SCHEDSTAT(se.statistics.block_start);
+ PN_SCHEDSTAT(se.statistics.sleep_max);
+ PN_SCHEDSTAT(se.statistics.block_max);
+ PN_SCHEDSTAT(se.statistics.exec_max);
+ PN_SCHEDSTAT(se.statistics.slice_max);
+ PN_SCHEDSTAT(se.statistics.wait_max);
+ PN_SCHEDSTAT(se.statistics.wait_sum);
+ P_SCHEDSTAT(se.statistics.wait_count);
+ PN_SCHEDSTAT(se.statistics.iowait_sum);
+ P_SCHEDSTAT(se.statistics.iowait_count);
+ P_SCHEDSTAT(se.statistics.nr_migrations_cold);
+ P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
+ P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
+ P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
+ P_SCHEDSTAT(se.statistics.nr_forced_migrations);
+ P_SCHEDSTAT(se.statistics.nr_wakeups);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_local);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
+ P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
+
+ avg_atom = p->se.sum_exec_runtime;
+ if (nr_switches)
+ avg_atom = div64_ul(avg_atom, nr_switches);
+ else
+ avg_atom = -1LL;
+
+ avg_per_cpu = p->se.sum_exec_runtime;
+ if (p->se.nr_migrations) {
+ avg_per_cpu = div64_u64(avg_per_cpu,
+ p->se.nr_migrations);
+ } else {
+ avg_per_cpu = -1LL;
+ }
+
+ __PN(avg_atom);
+ __PN(avg_per_cpu);
+ }
+
+ __P(nr_switches);
+ __PS("nr_voluntary_switches", p->nvcsw);
+ __PS("nr_involuntary_switches", p->nivcsw);
+
+ P(se.load.weight);
+#ifdef CONFIG_SMP
+ P(se.avg.load_sum);
+ P(se.avg.runnable_sum);
+ P(se.avg.util_sum);
+ P(se.avg.load_avg);
+ P(se.avg.runnable_avg);
+ P(se.avg.util_avg);
+ P(se.avg.last_update_time);
+ P(se.avg.util_est.ewma);
+ PM(se.avg.util_est.enqueued, ~UTIL_AVG_UNCHANGED);
+#endif
+#ifdef CONFIG_UCLAMP_TASK
+ __PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value);
+ __PS("uclamp.max", p->uclamp_req[UCLAMP_MAX].value);
+ __PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
+ __PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
+#endif
+ P(policy);
+ P(prio);
+ if (task_has_dl_policy(p)) {
+ P(dl.runtime);
+ P(dl.deadline);
+ }
+#undef PN_SCHEDSTAT
+#undef P_SCHEDSTAT
+
+ {
+ unsigned int this_cpu = raw_smp_processor_id();
+ u64 t0, t1;
+
+ t0 = cpu_clock(this_cpu);
+ t1 = cpu_clock(this_cpu);
+ __PS("clock-delta", t1-t0);
+ }
+
+ sched_show_numa(p, m);
+}
+
+void proc_sched_set_task(struct task_struct *p)
+{
+#ifdef CONFIG_SCHEDSTATS
+ memset(&p->se.statistics, 0, sizeof(p->se.statistics));
+#endif
+}