summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0258-sched-Add-support-for-lazy-preemption.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0258-sched-Add-support-for-lazy-preemption.patch')
-rw-r--r--debian/patches-rt/0258-sched-Add-support-for-lazy-preemption.patch691
1 files changed, 691 insertions, 0 deletions
diff --git a/debian/patches-rt/0258-sched-Add-support-for-lazy-preemption.patch b/debian/patches-rt/0258-sched-Add-support-for-lazy-preemption.patch
new file mode 100644
index 000000000..12d064860
--- /dev/null
+++ b/debian/patches-rt/0258-sched-Add-support-for-lazy-preemption.patch
@@ -0,0 +1,691 @@
+From e8b4a64bf2eb6ff6330544895c0f69696059ffda Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 26 Oct 2012 18:50:54 +0100
+Subject: [PATCH 258/323] sched: Add support for lazy preemption
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.10/older/patches-5.10.204-rt100.tar.xz
+
+It has become an obsession to mitigate the determinism vs. throughput
+loss of RT. Looking at the mainline semantics of preemption points
+gives a hint why RT sucks throughput wise for ordinary SCHED_OTHER
+tasks. One major issue is the wakeup of tasks which are right away
+preempting the waking task while the waking task holds a lock on which
+the woken task will block right after having preempted the wakee. In
+mainline this is prevented due to the implicit preemption disable of
+spin/rw_lock held regions. On RT this is not possible due to the fully
+preemptible nature of sleeping spinlocks.
+
+Though for a SCHED_OTHER task preempting another SCHED_OTHER task this
+is really not a correctness issue. RT folks are concerned about
+SCHED_FIFO/RR tasks preemption and not about the purely fairness
+driven SCHED_OTHER preemption latencies.
+
+So I introduced a lazy preemption mechanism which only applies to
+SCHED_OTHER tasks preempting another SCHED_OTHER task. Aside of the
+existing preempt_count each tasks sports now a preempt_lazy_count
+which is manipulated on lock acquiry and release. This is slightly
+incorrect as for lazyness reasons I coupled this on
+migrate_disable/enable so some other mechanisms get the same treatment
+(e.g. get_cpu_light).
+
+Now on the scheduler side instead of setting NEED_RESCHED this sets
+NEED_RESCHED_LAZY in case of a SCHED_OTHER/SCHED_OTHER preemption and
+therefor allows to exit the waking task the lock held region before
+the woken task preempts. That also works better for cross CPU wakeups
+as the other side can stay in the adaptive spinning loop.
+
+For RT class preemption there is no change. This simply sets
+NEED_RESCHED and forgoes the lazy preemption counter.
+
+ Initial test do not expose any observable latency increasement, but
+history shows that I've been proven wrong before :)
+
+The lazy preemption mode is per default on, but with
+CONFIG_SCHED_DEBUG enabled it can be disabled via:
+
+ # echo NO_PREEMPT_LAZY >/sys/kernel/debug/sched_features
+
+and reenabled via
+
+ # echo PREEMPT_LAZY >/sys/kernel/debug/sched_features
+
+The test results so far are very machine and workload dependent, but
+there is a clear trend that it enhances the non RT workload
+performance.
+
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+---
+ include/linux/preempt.h | 54 ++++++++++++++++++++++--
+ include/linux/sched.h | 38 +++++++++++++++++
+ include/linux/thread_info.h | 12 +++++-
+ include/linux/trace_events.h | 5 ++-
+ kernel/Kconfig.preempt | 6 +++
+ kernel/sched/core.c | 82 +++++++++++++++++++++++++++++++++++-
+ kernel/sched/fair.c | 16 +++----
+ kernel/sched/features.h | 3 ++
+ kernel/sched/sched.h | 9 ++++
+ kernel/trace/trace.c | 50 +++++++++++++---------
+ kernel/trace/trace_events.c | 1 +
+ kernel/trace/trace_output.c | 14 +++++-
+ 12 files changed, 254 insertions(+), 36 deletions(-)
+
+diff --git a/include/linux/preempt.h b/include/linux/preempt.h
+index fb140e00f74d..af39859f02ee 100644
+--- a/include/linux/preempt.h
++++ b/include/linux/preempt.h
+@@ -174,6 +174,20 @@ extern void preempt_count_sub(int val);
+ #define preempt_count_inc() preempt_count_add(1)
+ #define preempt_count_dec() preempt_count_sub(1)
+
++#ifdef CONFIG_PREEMPT_LAZY
++#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0)
++#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0)
++#define inc_preempt_lazy_count() add_preempt_lazy_count(1)
++#define dec_preempt_lazy_count() sub_preempt_lazy_count(1)
++#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count)
++#else
++#define add_preempt_lazy_count(val) do { } while (0)
++#define sub_preempt_lazy_count(val) do { } while (0)
++#define inc_preempt_lazy_count() do { } while (0)
++#define dec_preempt_lazy_count() do { } while (0)
++#define preempt_lazy_count() (0)
++#endif
++
+ #ifdef CONFIG_PREEMPT_COUNT
+
+ #define preempt_disable() \
+@@ -182,6 +196,12 @@ do { \
+ barrier(); \
+ } while (0)
+
++#define preempt_lazy_disable() \
++do { \
++ inc_preempt_lazy_count(); \
++ barrier(); \
++} while (0)
++
+ #define sched_preempt_enable_no_resched() \
+ do { \
+ barrier(); \
+@@ -219,6 +239,18 @@ do { \
+ __preempt_schedule(); \
+ } while (0)
+
++/*
++ * open code preempt_check_resched() because it is not exported to modules and
++ * used by local_unlock() or bpf_enable_instrumentation().
++ */
++#define preempt_lazy_enable() \
++do { \
++ dec_preempt_lazy_count(); \
++ barrier(); \
++ if (should_resched(0)) \
++ __preempt_schedule(); \
++} while (0)
++
+ #else /* !CONFIG_PREEMPTION */
+ #define preempt_enable() \
+ do { \
+@@ -226,6 +258,12 @@ do { \
+ preempt_count_dec(); \
+ } while (0)
+
++#define preempt_lazy_enable() \
++do { \
++ dec_preempt_lazy_count(); \
++ barrier(); \
++} while (0)
++
+ #define preempt_enable_notrace() \
+ do { \
+ barrier(); \
+@@ -267,6 +305,9 @@ do { \
+ #define preempt_check_resched_rt() barrier()
+ #define preemptible() 0
+
++#define preempt_lazy_disable() barrier()
++#define preempt_lazy_enable() barrier()
++
+ #endif /* CONFIG_PREEMPT_COUNT */
+
+ #ifdef MODULE
+@@ -285,7 +326,7 @@ do { \
+ } while (0)
+ #define preempt_fold_need_resched() \
+ do { \
+- if (tif_need_resched()) \
++ if (tif_need_resched_now()) \
+ set_preempt_need_resched(); \
+ } while (0)
+
+@@ -413,8 +454,15 @@ extern void migrate_enable(void);
+
+ #else
+
+-static inline void migrate_disable(void) { }
+-static inline void migrate_enable(void) { }
++static inline void migrate_disable(void)
++{
++ preempt_lazy_disable();
++}
++
++static inline void migrate_enable(void)
++{
++ preempt_lazy_enable();
++}
+
+ #endif /* CONFIG_SMP */
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index bd0c9c633438..665a17e4f69b 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1882,6 +1882,44 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
+ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+ }
+
++#ifdef CONFIG_PREEMPT_LAZY
++static inline void set_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
++}
++
++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY);
++}
++
++static inline int test_tsk_need_resched_lazy(struct task_struct *tsk)
++{
++ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY));
++}
++
++static inline int need_resched_lazy(void)
++{
++ return test_thread_flag(TIF_NEED_RESCHED_LAZY);
++}
++
++static inline int need_resched_now(void)
++{
++ return test_thread_flag(TIF_NEED_RESCHED);
++}
++
++#else
++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { }
++static inline int need_resched_lazy(void) { return 0; }
++
++static inline int need_resched_now(void)
++{
++ return test_thread_flag(TIF_NEED_RESCHED);
++}
++
++#endif
++
++
+ static inline bool __task_is_stopped_or_traced(struct task_struct *task)
+ {
+ if (task->state & (__TASK_STOPPED | __TASK_TRACED))
+diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
+index f3040b0b4b23..3cb02ced141b 100644
+--- a/include/linux/thread_info.h
++++ b/include/linux/thread_info.h
+@@ -110,7 +110,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
+ #define test_thread_flag(flag) \
+ test_ti_thread_flag(current_thread_info(), flag)
+
+-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
++#ifdef CONFIG_PREEMPT_LAZY
++#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \
++ test_thread_flag(TIF_NEED_RESCHED_LAZY))
++#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED))
++#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY))
++
++#else
++#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
++#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED)
++#define tif_need_resched_lazy() 0
++#endif
+
+ #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
+ static inline int arch_within_stack_frames(const void * const stack,
+diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
+index 2151524a10f0..e7afd9fe35e5 100644
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -70,6 +70,7 @@ struct trace_entry {
+ unsigned char preempt_count;
+ int pid;
+ unsigned char migrate_disable;
++ unsigned char preempt_lazy_count;
+ };
+
+ #define TRACE_EVENT_TYPE_MAX \
+@@ -159,9 +160,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry,
+ {
+ entry->preempt_count = trace_ctx & 0xff;
+ entry->migrate_disable = (trace_ctx >> 8) & 0xff;
++ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff;
+ entry->pid = current->pid;
+ entry->type = type;
+- entry->flags = trace_ctx >> 16;
++ entry->flags = trace_ctx >> 24;
+ }
+
+ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status);
+@@ -174,6 +176,7 @@ enum trace_flag_type {
+ TRACE_FLAG_SOFTIRQ = 0x10,
+ TRACE_FLAG_PREEMPT_RESCHED = 0x20,
+ TRACE_FLAG_NMI = 0x40,
++ TRACE_FLAG_NEED_RESCHED_LAZY = 0x80,
+ };
+
+ #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
+index cbe3aa495519..b5cd1e278eb5 100644
+--- a/kernel/Kconfig.preempt
++++ b/kernel/Kconfig.preempt
+@@ -1,5 +1,11 @@
+ # SPDX-License-Identifier: GPL-2.0-only
+
++config HAVE_PREEMPT_LAZY
++ bool
++
++config PREEMPT_LAZY
++ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT
++
+ choice
+ prompt "Preemption Model"
+ default PREEMPT_NONE
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 8ac1f0526476..c847d17e3b04 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -656,6 +656,48 @@ void resched_curr(struct rq *rq)
+ trace_sched_wake_idle_without_ipi(cpu);
+ }
+
++#ifdef CONFIG_PREEMPT_LAZY
++
++static int tsk_is_polling(struct task_struct *p)
++{
++#ifdef TIF_POLLING_NRFLAG
++ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
++#else
++ return 0;
++#endif
++}
++
++void resched_curr_lazy(struct rq *rq)
++{
++ struct task_struct *curr = rq->curr;
++ int cpu;
++
++ if (!sched_feat(PREEMPT_LAZY)) {
++ resched_curr(rq);
++ return;
++ }
++
++ lockdep_assert_held(&rq->lock);
++
++ if (test_tsk_need_resched(curr))
++ return;
++
++ if (test_tsk_need_resched_lazy(curr))
++ return;
++
++ set_tsk_need_resched_lazy(curr);
++
++ cpu = cpu_of(rq);
++ if (cpu == smp_processor_id())
++ return;
++
++ /* NEED_RESCHED_LAZY must be visible before we test polling */
++ smp_mb();
++ if (!tsk_is_polling(curr))
++ smp_send_reschedule(cpu);
++}
++#endif
++
+ void resched_cpu(int cpu)
+ {
+ struct rq *rq = cpu_rq(cpu);
+@@ -1772,6 +1814,7 @@ void migrate_disable(void)
+ preempt_disable();
+ this_rq()->nr_pinned++;
+ p->migration_disabled = 1;
++ preempt_lazy_disable();
+ preempt_enable();
+ }
+ EXPORT_SYMBOL_GPL(migrate_disable);
+@@ -1800,6 +1843,7 @@ void migrate_enable(void)
+ barrier();
+ p->migration_disabled = 0;
+ this_rq()->nr_pinned--;
++ preempt_lazy_enable();
+ preempt_enable();
+
+ trace_sched_migrate_enable_tp(p);
+@@ -3822,6 +3866,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
+ p->on_cpu = 0;
+ #endif
+ init_task_preempt_count(p);
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++ task_thread_info(p)->preempt_lazy_count = 0;
++#endif
+ #ifdef CONFIG_SMP
+ plist_node_init(&p->pushable_tasks, MAX_PRIO);
+ RB_CLEAR_NODE(&p->pushable_dl_tasks);
+@@ -5101,6 +5148,7 @@ static void __sched notrace __schedule(bool preempt, bool spinning_lock)
+
+ next = pick_next_task(rq, prev, &rf);
+ clear_tsk_need_resched(prev);
++ clear_tsk_need_resched_lazy(prev);
+ clear_preempt_need_resched();
+
+ if (likely(prev != next)) {
+@@ -5300,6 +5348,30 @@ static void __sched notrace preempt_schedule_common(void)
+ } while (need_resched());
+ }
+
++#ifdef CONFIG_PREEMPT_LAZY
++/*
++ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is
++ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as
++ * preempt_lazy_count counter >0.
++ */
++static __always_inline int preemptible_lazy(void)
++{
++ if (test_thread_flag(TIF_NEED_RESCHED))
++ return 1;
++ if (current_thread_info()->preempt_lazy_count)
++ return 0;
++ return 1;
++}
++
++#else
++
++static inline int preemptible_lazy(void)
++{
++ return 1;
++}
++
++#endif
++
+ #ifdef CONFIG_PREEMPTION
+ /*
+ * This is the entry point to schedule() from in-kernel preemption
+@@ -5313,7 +5385,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
+ */
+ if (likely(!preemptible()))
+ return;
+-
++ if (!preemptible_lazy())
++ return;
+ preempt_schedule_common();
+ }
+ NOKPROBE_SYMBOL(preempt_schedule);
+@@ -5353,6 +5426,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
+ if (likely(!preemptible()))
+ return;
+
++ if (!preemptible_lazy())
++ return;
++
+ do {
+ /*
+ * Because the function tracer can trace preempt_count_sub()
+@@ -7175,7 +7251,9 @@ void __init init_idle(struct task_struct *idle, int cpu)
+
+ /* Set the preempt count _outside_ the spinlocks! */
+ init_idle_preempt_count(idle, cpu);
+-
++#ifdef CONFIG_HAVE_PREEMPT_LAZY
++ task_thread_info(idle)->preempt_lazy_count = 0;
++#endif
+ /*
+ * The idle tasks have their own, simple scheduling class:
+ */
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 73a89fbd81be..f4928e5b6611 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -4570,7 +4570,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ ideal_runtime = sched_slice(cfs_rq, curr);
+ delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
+ if (delta_exec > ideal_runtime) {
+- resched_curr(rq_of(cfs_rq));
++ resched_curr_lazy(rq_of(cfs_rq));
+ /*
+ * The current task ran long enough, ensure it doesn't get
+ * re-elected due to buddy favours.
+@@ -4594,7 +4594,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+ return;
+
+ if (delta > ideal_runtime)
+- resched_curr(rq_of(cfs_rq));
++ resched_curr_lazy(rq_of(cfs_rq));
+ }
+
+ static void
+@@ -4737,7 +4737,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
+ * validating it and just reschedule.
+ */
+ if (queued) {
+- resched_curr(rq_of(cfs_rq));
++ resched_curr_lazy(rq_of(cfs_rq));
+ return;
+ }
+ /*
+@@ -4874,7 +4874,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
+ * hierarchy can be throttled
+ */
+ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
+- resched_curr(rq_of(cfs_rq));
++ resched_curr_lazy(rq_of(cfs_rq));
+ }
+
+ static __always_inline
+@@ -5609,7 +5609,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
+
+ if (delta < 0) {
+ if (rq->curr == p)
+- resched_curr(rq);
++ resched_curr_lazy(rq);
+ return;
+ }
+ hrtick_start(rq, delta);
+@@ -7218,7 +7218,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
+ return;
+
+ preempt:
+- resched_curr(rq);
++ resched_curr_lazy(rq);
+ /*
+ * Only set the backward buddy when the current task is still
+ * on the rq. This can happen when a wakeup gets interleaved
+@@ -11059,7 +11059,7 @@ static void task_fork_fair(struct task_struct *p)
+ * 'current' within the tree based on its new key value.
+ */
+ swap(curr->vruntime, se->vruntime);
+- resched_curr(rq);
++ resched_curr_lazy(rq);
+ }
+
+ se->vruntime -= cfs_rq->min_vruntime;
+@@ -11086,7 +11086,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
+ */
+ if (rq->curr == p) {
+ if (p->prio > oldprio)
+- resched_curr(rq);
++ resched_curr_lazy(rq);
+ } else
+ check_preempt_curr(rq, p, 0);
+ }
+diff --git a/kernel/sched/features.h b/kernel/sched/features.h
+index 402fd37fb340..bc2466af142e 100644
+--- a/kernel/sched/features.h
++++ b/kernel/sched/features.h
+@@ -47,6 +47,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
+
+ #ifdef CONFIG_PREEMPT_RT
+ SCHED_FEAT(TTWU_QUEUE, false)
++# ifdef CONFIG_PREEMPT_LAZY
++SCHED_FEAT(PREEMPT_LAZY, true)
++# endif
+ #else
+
+ /*
+diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
+index d4bfc51358d3..ad854a670701 100644
+--- a/kernel/sched/sched.h
++++ b/kernel/sched/sched.h
+@@ -1997,6 +1997,15 @@ extern void reweight_task(struct task_struct *p, int prio);
+ extern void resched_curr(struct rq *rq);
+ extern void resched_cpu(int cpu);
+
++#ifdef CONFIG_PREEMPT_LAZY
++extern void resched_curr_lazy(struct rq *rq);
++#else
++static inline void resched_curr_lazy(struct rq *rq)
++{
++ resched_curr(rq);
++}
++#endif
++
+ extern struct rt_bandwidth def_rt_bandwidth;
+ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 02dffb1862b8..7caae85af03d 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -2602,8 +2602,16 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
+ trace_flags |= TRACE_FLAG_NEED_RESCHED;
+ if (test_preempt_need_resched())
+ trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
+- return (trace_flags << 16) | (pc & 0xff) |
+- (migration_disable_value() & 0xff) << 8;
++
++#ifdef CONFIG_PREEMPT_LAZY
++ if (need_resched_lazy())
++ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
++#endif
++
++ return (pc & 0xff) |
++ (migration_disable_value() & 0xff) << 8 |
++ (preempt_lazy_count() & 0xff) << 16 |
++ (trace_flags << 24);
+ }
+
+ struct ring_buffer_event *
+@@ -3861,15 +3869,17 @@ unsigned long trace_total_entries(struct trace_array *tr)
+
+ static void print_lat_help_header(struct seq_file *m)
+ {
+- seq_puts(m, "# _------=> CPU# \n"
+- "# / _-----=> irqs-off \n"
+- "# | / _----=> need-resched \n"
+- "# || / _---=> hardirq/softirq \n"
+- "# ||| / _--=> preempt-depth \n"
+- "# |||| / _-=> migrate-disable \n"
+- "# ||||| / delay \n"
+- "# cmd pid |||||| time | caller \n"
+- "# \\ / |||||| \\ | / \n");
++ seq_puts(m, "# _--------=> CPU# \n"
++ "# / _-------=> irqs-off \n"
++ "# | / _------=> need-resched \n"
++ "# || / _-----=> need-resched-lazy\n"
++ "# ||| / _----=> hardirq/softirq \n"
++ "# |||| / _---=> preempt-depth \n"
++ "# ||||| / _--=> preempt-lazy-depth\n"
++ "# |||||| / _-=> migrate-disable \n"
++ "# ||||||| / delay \n"
++ "# cmd pid |||||||| time | caller \n"
++ "# \\ / |||||||| \\ | / \n");
+ }
+
+ static void print_event_info(struct array_buffer *buf, struct seq_file *m)
+@@ -3903,14 +3913,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
+
+ print_event_info(buf, m);
+
+- seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
+- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
+- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
+- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
+- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space);
+- seq_printf(m, "# %.*s|||| / delay\n", prec, space);
+- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
+- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | ");
++ seq_printf(m, "# %.*s _-------=> irqs-off\n", prec, space);
++ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space);
++ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space);
++ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space);
++ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space);
++ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space);
++ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space);
++ seq_printf(m, "# %.*s|||||| / delay\n", prec, space);
++ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
++ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | ");
+ }
+
+ void
+diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
+index 636fb7df3714..245b8289ecdf 100644
+--- a/kernel/trace/trace_events.c
++++ b/kernel/trace/trace_events.c
+@@ -185,6 +185,7 @@ static int trace_define_common_fields(void)
+ __common_field(unsigned char, preempt_count);
+ __common_field(int, pid);
+ __common_field(unsigned char, migrate_disable);
++ __common_field(unsigned char, preempt_lazy_count);
+
+ return ret;
+ }
+diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
+index b3619b21217c..5a71964ade3a 100644
+--- a/kernel/trace/trace_output.c
++++ b/kernel/trace/trace_output.c
+@@ -451,6 +451,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ {
+ char hardsoft_irq;
+ char need_resched;
++ char need_resched_lazy;
+ char irqs_off;
+ int hardirq;
+ int softirq;
+@@ -481,6 +482,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ break;
+ }
+
++ need_resched_lazy =
++ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.';
++
+ hardsoft_irq =
+ (nmi && hardirq) ? 'Z' :
+ nmi ? 'z' :
+@@ -489,14 +493,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
+ softirq ? 's' :
+ '.' ;
+
+- trace_seq_printf(s, "%c%c%c",
+- irqs_off, need_resched, hardsoft_irq);
++ trace_seq_printf(s, "%c%c%c%c",
++ irqs_off, need_resched, need_resched_lazy,
++ hardsoft_irq);
+
+ if (entry->preempt_count)
+ trace_seq_printf(s, "%x", entry->preempt_count);
+ else
+ trace_seq_putc(s, '.');
+
++ if (entry->preempt_lazy_count)
++ trace_seq_printf(s, "%x", entry->preempt_lazy_count);
++ else
++ trace_seq_putc(s, '.');
++
+ if (entry->migrate_disable)
+ trace_seq_printf(s, "%x", entry->migrate_disable);
+ else
+--
+2.43.0
+