diff options
Diffstat (limited to '')
63 files changed, 10184 insertions, 0 deletions
diff --git a/debian/patches-rt/0001-vduse-Remove-include-of-rwlock.h.patch b/debian/patches-rt/0001-vduse-Remove-include-of-rwlock.h.patch new file mode 100644 index 000000000..891081a3a --- /dev/null +++ b/debian/patches-rt/0001-vduse-Remove-include-of-rwlock.h.patch @@ -0,0 +1,33 @@ +From 52072a197524e62baa4ac9a5f33d15cd8b27fb17 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 16 Aug 2022 09:45:22 +0200 +Subject: [PATCH 01/62] vduse: Remove include of rwlock.h +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +rwlock.h should not be included directly. Instead linux/splinlock.h +should be included. Including it directly will break the RT build. + +Remove the rwlock.h include. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Michael S. Tsirkin <mst@redhat.com> +Link: https://lkml.kernel.org/r/20221026134407.711768-1-bigeasy@linutronix.de +--- + drivers/vdpa/vdpa_user/iova_domain.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/vdpa/vdpa_user/iova_domain.h b/drivers/vdpa/vdpa_user/iova_domain.h +index 4e0e50e7ac15..173e979b84a9 100644 +--- a/drivers/vdpa/vdpa_user/iova_domain.h ++++ b/drivers/vdpa/vdpa_user/iova_domain.h +@@ -14,7 +14,6 @@ + #include <linux/iova.h> + #include <linux/dma-mapping.h> + #include <linux/vhost_iotlb.h> +-#include <linux/rwlock.h> + + #define IOVA_START_PFN 1 + +-- +2.43.0 + diff --git a/debian/patches-rt/0002-signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch b/debian/patches-rt/0002-signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch new file mode 100644 index 000000000..05ec116ad --- /dev/null +++ b/debian/patches-rt/0002-signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch @@ -0,0 +1,66 @@ +From d5541b6ef4eccee650abfe3095b9e7365773494c Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 22 Jun 2022 11:36:17 +0200 +Subject: [PATCH 02/62] signal: Don't disable preemption in ptrace_stop() on + PREEMPT_RT. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Commit + 53da1d9456fe7 ("fix ptrace slowness") + +is just band aid around the problem. +The invocation of do_notify_parent_cldstop() wakes the parent and makes +it runnable. The scheduler then wants to replace this still running task +with the parent. With the read_lock() acquired this is not possible +because preemption is disabled and so this is deferred until read_unlock(). +This scheduling point is undesired and is avoided by disabling preemption +around the unlock operation enabled again before the schedule() invocation +without a preemption point. +This is only undesired because the parent sleeps a cycle in +wait_task_inactive() until the traced task leaves the run-queue in +schedule(). It is not a correctness issue, it is just band aid to avoid the +visbile delay which sums up over multiple invocations. +The task can still be preempted if an interrupt occurs between +preempt_enable_no_resched() and freezable_schedule() because on the IRQ-exit +path of the interrupt scheduling _will_ happen. This is ignored since it does +not happen very often. + +On PREEMPT_RT keeping preemption disabled during the invocation of +cgroup_enter_frozen() becomes a problem because the function acquires +css_set_lock which is a sleeping lock on PREEMPT_RT and must not be +acquired with disabled preemption. + +Don't disable preemption on PREEMPT_RT. Remove the TODO regarding adding +read_unlock_no_resched() as there is no need for it and will cause harm. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220720154435.232749-2-bigeasy@linutronix.de +--- + kernel/signal.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/kernel/signal.c b/kernel/signal.c +index 5d45f5da2b36..58e919c7c936 100644 +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -2302,13 +2302,13 @@ static int ptrace_stop(int exit_code, int why, unsigned long message, + /* + * Don't want to allow preemption here, because + * sys_ptrace() needs this task to be inactive. +- * +- * XXX: implement read_unlock_no_resched(). + */ +- preempt_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + read_unlock(&tasklist_lock); + cgroup_enter_frozen(); +- preempt_enable_no_resched(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable_no_resched(); + schedule(); + cgroup_leave_frozen(true); + +-- +2.43.0 + diff --git a/debian/patches-rt/0003-sched-Consider-task_struct-saved_state-in-wait_task_.patch b/debian/patches-rt/0003-sched-Consider-task_struct-saved_state-in-wait_task_.patch new file mode 100644 index 000000000..933385ab9 --- /dev/null +++ b/debian/patches-rt/0003-sched-Consider-task_struct-saved_state-in-wait_task_.patch @@ -0,0 +1,152 @@ +From e4742fc784660e012dc23090a72614bf1f9a0ca1 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 22 Jun 2022 12:27:05 +0200 +Subject: [PATCH 03/62] sched: Consider task_struct::saved_state in + wait_task_inactive(). +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Ptrace is using wait_task_inactive() to wait for the tracee to reach a +certain task state. On PREEMPT_RT that state may be stored in +task_struct::saved_state while the tracee blocks on a sleeping lock and +task_struct::__state is set to TASK_RTLOCK_WAIT. +It is not possible to check only for TASK_RTLOCK_WAIT to be sure that the task +is blocked on a sleeping lock because during wake up (after the sleeping lock +has been acquired) the task state is set TASK_RUNNING. After the task in on CPU +and acquired the pi_lock it will reset the state accordingly but until then +TASK_RUNNING will be observed (with the desired state saved in saved_state). + +Check also for task_struct::saved_state if the desired match was not found in +task_struct::__state on PREEMPT_RT. If the state was found in saved_state, wait +until the task is idle and state is visible in task_struct::__state. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Reviewed-by: Valentin Schneider <vschneid@redhat.com> +Link: https://lkml.kernel.org/r/Yt%2FpQAFQ1xKNK0RY@linutronix.de +--- + kernel/sched/core.c | 81 ++++++++++++++++++++++++++++++++++++++++++--- + 1 file changed, 76 insertions(+), 5 deletions(-) + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 18a4f8f28a25..6bd06122850a 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -3281,6 +3281,76 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, + } + #endif /* CONFIG_NUMA_BALANCING */ + ++#ifdef CONFIG_PREEMPT_RT ++ ++/* ++ * Consider: ++ * ++ * set_special_state(X); ++ * ++ * do_things() ++ * // Somewhere in there is an rtlock that can be contended: ++ * current_save_and_set_rtlock_wait_state(); ++ * [...] ++ * schedule_rtlock(); (A) ++ * [...] ++ * current_restore_rtlock_saved_state(); ++ * ++ * schedule(); (B) ++ * ++ * If p->saved_state is anything else than TASK_RUNNING, then p blocked on an ++ * rtlock (A) *before* voluntarily calling into schedule() (B) after setting its ++ * state to X. For things like ptrace (X=TASK_TRACED), the task could have more ++ * work to do upon acquiring the lock in do_things() before whoever called ++ * wait_task_inactive() should return. IOW, we have to wait for: ++ * ++ * p.saved_state = TASK_RUNNING ++ * p.__state = X ++ * ++ * which implies the task isn't blocked on an RT lock and got to schedule() (B). ++ * ++ * Also see comments in ttwu_state_match(). ++ */ ++ ++static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) ++{ ++ unsigned long flags; ++ bool mismatch; ++ ++ raw_spin_lock_irqsave(&p->pi_lock, flags); ++ if (READ_ONCE(p->__state) & match_state) ++ mismatch = false; ++ else if (READ_ONCE(p->saved_state) & match_state) ++ mismatch = false; ++ else ++ mismatch = true; ++ ++ raw_spin_unlock_irqrestore(&p->pi_lock, flags); ++ return mismatch; ++} ++static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, ++ bool *wait) ++{ ++ if (READ_ONCE(p->__state) & match_state) ++ return true; ++ if (READ_ONCE(p->saved_state) & match_state) { ++ *wait = true; ++ return true; ++ } ++ return false; ++} ++#else ++static __always_inline bool state_mismatch(struct task_struct *p, unsigned int match_state) ++{ ++ return !(READ_ONCE(p->__state) & match_state); ++} ++static __always_inline bool state_match(struct task_struct *p, unsigned int match_state, ++ bool *wait) ++{ ++ return (READ_ONCE(p->__state) & match_state); ++} ++#endif ++ + /* + * wait_task_inactive - wait for a thread to unschedule. + * +@@ -3299,7 +3369,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, + */ + unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state) + { +- int running, queued; ++ bool running, wait; + struct rq_flags rf; + unsigned long ncsw; + struct rq *rq; +@@ -3325,7 +3395,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state + * is actually now running somewhere else! + */ + while (task_on_cpu(rq, p)) { +- if (!(READ_ONCE(p->__state) & match_state)) ++ if (state_mismatch(p, match_state)) + return 0; + cpu_relax(); + } +@@ -3338,9 +3408,10 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state + rq = task_rq_lock(p, &rf); + trace_sched_wait_task(p); + running = task_on_cpu(rq, p); +- queued = task_on_rq_queued(p); ++ wait = task_on_rq_queued(p); + ncsw = 0; +- if (READ_ONCE(p->__state) & match_state) ++ ++ if (state_match(p, match_state, &wait)) + ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ + task_rq_unlock(rq, p, &rf); + +@@ -3370,7 +3441,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state + * running right now), it's preempted, and we should + * yield - it could be a while. + */ +- if (unlikely(queued)) { ++ if (unlikely(wait)) { + ktime_t to = NSEC_PER_SEC / HZ; + + set_current_state(TASK_UNINTERRUPTIBLE); +-- +2.43.0 + diff --git a/debian/patches-rt/0004-spi-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch b/debian/patches-rt/0004-spi-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch new file mode 100644 index 000000000..0f1b0f221 --- /dev/null +++ b/debian/patches-rt/0004-spi-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch @@ -0,0 +1,40 @@ +From 638117350cb3452dd5043156c7e394befe7d6eb9 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:15:32 +0200 +Subject: [PATCH 04/62] spi: Remove the obsolte u64_stats_fetch_*_irq() users. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Convert to the regular interface. + +Cc: Mark Brown <broonie@kernel.org> +Cc: linux-spi@vger.kernel.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + drivers/spi/spi.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c +index 5d046be8b2dd..716e6d6ecf98 100644 +--- a/drivers/spi/spi.c ++++ b/drivers/spi/spi.c +@@ -127,10 +127,10 @@ do { \ + unsigned int start; \ + pcpu_stats = per_cpu_ptr(in, i); \ + do { \ +- start = u64_stats_fetch_begin_irq( \ ++ start = u64_stats_fetch_begin( \ + &pcpu_stats->syncp); \ + inc = u64_stats_read(&pcpu_stats->field); \ +- } while (u64_stats_fetch_retry_irq( \ ++ } while (u64_stats_fetch_retry( \ + &pcpu_stats->syncp, start)); \ + ret += inc; \ + } \ +-- +2.43.0 + diff --git a/debian/patches-rt/0005-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-d.patch b/debian/patches-rt/0005-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-d.patch new file mode 100644 index 000000000..a4d96ec61 --- /dev/null +++ b/debian/patches-rt/0005-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-d.patch @@ -0,0 +1,2087 @@ +From d1c3fb886c8b630c3a70f2f7192d53f545798283 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:15:44 +0200 +Subject: [PATCH 05/62] net: Remove the obsolte u64_stats_fetch_*_irq() users + (drivers). +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Convert to the regular interface. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + drivers/net/ethernet/alacritech/slic.h | 12 +++---- + drivers/net/ethernet/amazon/ena/ena_ethtool.c | 4 +-- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 12 +++---- + .../net/ethernet/aquantia/atlantic/aq_ring.c | 8 ++--- + drivers/net/ethernet/asix/ax88796c_main.c | 4 +-- + drivers/net/ethernet/broadcom/b44.c | 8 ++--- + drivers/net/ethernet/broadcom/bcmsysport.c | 12 +++---- + drivers/net/ethernet/cortina/gemini.c | 24 +++++++------- + .../net/ethernet/emulex/benet/be_ethtool.c | 12 +++---- + drivers/net/ethernet/emulex/benet/be_main.c | 16 +++++----- + .../ethernet/fungible/funeth/funeth_txrx.h | 4 +-- + drivers/net/ethernet/google/gve/gve_ethtool.c | 16 +++++----- + drivers/net/ethernet/google/gve/gve_main.c | 12 +++---- + .../net/ethernet/hisilicon/hns3/hns3_enet.c | 4 +-- + drivers/net/ethernet/huawei/hinic/hinic_rx.c | 4 +-- + drivers/net/ethernet/huawei/hinic/hinic_tx.c | 4 +-- + .../net/ethernet/intel/fm10k/fm10k_netdev.c | 8 ++--- + .../net/ethernet/intel/i40e/i40e_ethtool.c | 8 ++--- + drivers/net/ethernet/intel/i40e/i40e_main.c | 20 ++++++------ + .../net/ethernet/intel/iavf/iavf_ethtool.c | 8 ++--- + drivers/net/ethernet/intel/ice/ice_main.c | 4 +-- + drivers/net/ethernet/intel/igb/igb_ethtool.c | 12 +++---- + drivers/net/ethernet/intel/igb/igb_main.c | 8 ++--- + drivers/net/ethernet/intel/igc/igc_ethtool.c | 12 +++---- + drivers/net/ethernet/intel/igc/igc_main.c | 8 ++--- + .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 8 ++--- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++--- + drivers/net/ethernet/intel/ixgbevf/ethtool.c | 12 +++---- + .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 8 ++--- + drivers/net/ethernet/marvell/mvneta.c | 8 ++--- + .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 8 ++--- + drivers/net/ethernet/marvell/sky2.c | 8 ++--- + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++--- + .../net/ethernet/mellanox/mlxsw/spectrum.c | 4 +-- + drivers/net/ethernet/microsoft/mana/mana_en.c | 8 ++--- + .../ethernet/microsoft/mana/mana_ethtool.c | 8 ++--- + .../ethernet/netronome/nfp/nfp_net_common.c | 8 ++--- + .../ethernet/netronome/nfp/nfp_net_ethtool.c | 8 ++--- + .../net/ethernet/netronome/nfp/nfp_net_repr.c | 4 +-- + drivers/net/ethernet/nvidia/forcedeth.c | 8 ++--- + .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 4 +-- + drivers/net/ethernet/realtek/8139too.c | 8 ++--- + drivers/net/ethernet/socionext/sni_ave.c | 8 ++--- + drivers/net/ethernet/ti/am65-cpsw-nuss.c | 4 +-- + drivers/net/ethernet/ti/netcp_core.c | 8 ++--- + drivers/net/ethernet/via/via-rhine.c | 8 ++--- + .../net/ethernet/xilinx/xilinx_axienet_main.c | 8 ++--- + drivers/net/hyperv/netvsc_drv.c | 32 +++++++++---------- + drivers/net/ifb.c | 12 +++---- + drivers/net/ipvlan/ipvlan_main.c | 4 +-- + drivers/net/loopback.c | 4 +-- + drivers/net/macsec.c | 12 +++---- + drivers/net/macvlan.c | 4 +-- + drivers/net/mhi_net.c | 8 ++--- + drivers/net/netdevsim/netdev.c | 4 +-- + drivers/net/team/team.c | 4 +-- + drivers/net/team/team_mode_loadbalance.c | 4 +-- + drivers/net/veth.c | 12 +++---- + drivers/net/virtio_net.c | 16 +++++----- + drivers/net/vrf.c | 4 +-- + drivers/net/vxlan/vxlan_vnifilter.c | 4 +-- + drivers/net/wwan/mhi_wwan_mbim.c | 8 ++--- + drivers/net/xen-netfront.c | 8 ++--- + 63 files changed, 274 insertions(+), 274 deletions(-) + +diff --git a/drivers/net/ethernet/alacritech/slic.h b/drivers/net/ethernet/alacritech/slic.h +index 4eecbdfff3ff..82071d0e5f7f 100644 +--- a/drivers/net/ethernet/alacritech/slic.h ++++ b/drivers/net/ethernet/alacritech/slic.h +@@ -288,13 +288,13 @@ do { \ + u64_stats_update_end(&(st)->syncp); \ + } while (0) + +-#define SLIC_GET_STATS_COUNTER(newst, st, counter) \ +-{ \ +- unsigned int start; \ ++#define SLIC_GET_STATS_COUNTER(newst, st, counter) \ ++{ \ ++ unsigned int start; \ + do { \ +- start = u64_stats_fetch_begin_irq(&(st)->syncp); \ +- newst = (st)->counter; \ +- } while (u64_stats_fetch_retry_irq(&(st)->syncp, start)); \ ++ start = u64_stats_fetch_begin(&(st)->syncp); \ ++ newst = (st)->counter; \ ++ } while (u64_stats_fetch_retry(&(st)->syncp, start)); \ + } + + struct slic_upr { +diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c +index 444ccef76da2..8da79eedc057 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c ++++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c +@@ -118,9 +118,9 @@ static void ena_safe_update_stat(u64 *src, u64 *dst, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(syncp); ++ start = u64_stats_fetch_begin(syncp); + *(dst) = *src; +- } while (u64_stats_fetch_retry_irq(syncp, start)); ++ } while (u64_stats_fetch_retry(syncp, start)); + } + + static void ena_queue_stats(struct ena_adapter *adapter, u64 **data) +diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c +index 044b8afde69a..e296546f03cd 100644 +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -3312,10 +3312,10 @@ static void ena_get_stats64(struct net_device *netdev, + tx_ring = &adapter->tx_ring[i]; + + do { +- start = u64_stats_fetch_begin_irq(&tx_ring->syncp); ++ start = u64_stats_fetch_begin(&tx_ring->syncp); + packets = tx_ring->tx_stats.cnt; + bytes = tx_ring->tx_stats.bytes; +- } while (u64_stats_fetch_retry_irq(&tx_ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&tx_ring->syncp, start)); + + stats->tx_packets += packets; + stats->tx_bytes += bytes; +@@ -3323,20 +3323,20 @@ static void ena_get_stats64(struct net_device *netdev, + rx_ring = &adapter->rx_ring[i]; + + do { +- start = u64_stats_fetch_begin_irq(&rx_ring->syncp); ++ start = u64_stats_fetch_begin(&rx_ring->syncp); + packets = rx_ring->rx_stats.cnt; + bytes = rx_ring->rx_stats.bytes; +- } while (u64_stats_fetch_retry_irq(&rx_ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&rx_ring->syncp, start)); + + stats->rx_packets += packets; + stats->rx_bytes += bytes; + } + + do { +- start = u64_stats_fetch_begin_irq(&adapter->syncp); ++ start = u64_stats_fetch_begin(&adapter->syncp); + rx_drops = adapter->dev_stats.rx_drops; + tx_drops = adapter->dev_stats.tx_drops; +- } while (u64_stats_fetch_retry_irq(&adapter->syncp, start)); ++ } while (u64_stats_fetch_retry(&adapter->syncp, start)); + + stats->rx_dropped = rx_drops; + stats->tx_dropped = tx_drops; +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +index 4d9d7d1edb9b..697ce83eeae1 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +@@ -957,7 +957,7 @@ unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) + /* This data should mimic aq_ethtool_queue_rx_stat_names structure */ + do { + count = 0; +- start = u64_stats_fetch_begin_irq(&self->stats.rx.syncp); ++ start = u64_stats_fetch_begin(&self->stats.rx.syncp); + data[count] = self->stats.rx.packets; + data[++count] = self->stats.rx.jumbo_packets; + data[++count] = self->stats.rx.lro_packets; +@@ -974,15 +974,15 @@ unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) + data[++count] = self->stats.rx.xdp_tx; + data[++count] = self->stats.rx.xdp_invalid; + data[++count] = self->stats.rx.xdp_redirect; +- } while (u64_stats_fetch_retry_irq(&self->stats.rx.syncp, start)); ++ } while (u64_stats_fetch_retry(&self->stats.rx.syncp, start)); + } else { + /* This data should mimic aq_ethtool_queue_tx_stat_names structure */ + do { + count = 0; +- start = u64_stats_fetch_begin_irq(&self->stats.tx.syncp); ++ start = u64_stats_fetch_begin(&self->stats.tx.syncp); + data[count] = self->stats.tx.packets; + data[++count] = self->stats.tx.queue_restarts; +- } while (u64_stats_fetch_retry_irq(&self->stats.tx.syncp, start)); ++ } while (u64_stats_fetch_retry(&self->stats.tx.syncp, start)); + } + + return ++count; +diff --git a/drivers/net/ethernet/asix/ax88796c_main.c b/drivers/net/ethernet/asix/ax88796c_main.c +index 8b7cdf015a16..21376c79f671 100644 +--- a/drivers/net/ethernet/asix/ax88796c_main.c ++++ b/drivers/net/ethernet/asix/ax88796c_main.c +@@ -662,12 +662,12 @@ static void ax88796c_get_stats64(struct net_device *ndev, + s = per_cpu_ptr(ax_local->stats, cpu); + + do { +- start = u64_stats_fetch_begin_irq(&s->syncp); ++ start = u64_stats_fetch_begin(&s->syncp); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&s->syncp, start)); ++ } while (u64_stats_fetch_retry(&s->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; +diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c +index 7f876721596c..b751dc8486dc 100644 +--- a/drivers/net/ethernet/broadcom/b44.c ++++ b/drivers/net/ethernet/broadcom/b44.c +@@ -1680,7 +1680,7 @@ static void b44_get_stats64(struct net_device *dev, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&hwstat->syncp); ++ start = u64_stats_fetch_begin(&hwstat->syncp); + + /* Convert HW stats into rtnl_link_stats64 stats. */ + nstat->rx_packets = hwstat->rx_pkts; +@@ -1714,7 +1714,7 @@ static void b44_get_stats64(struct net_device *dev, + /* Carrier lost counter seems to be broken for some devices */ + nstat->tx_carrier_errors = hwstat->tx_carrier_lost; + #endif +- } while (u64_stats_fetch_retry_irq(&hwstat->syncp, start)); ++ } while (u64_stats_fetch_retry(&hwstat->syncp, start)); + + } + +@@ -2082,12 +2082,12 @@ static void b44_get_ethtool_stats(struct net_device *dev, + do { + data_src = &hwstat->tx_good_octets; + data_dst = data; +- start = u64_stats_fetch_begin_irq(&hwstat->syncp); ++ start = u64_stats_fetch_begin(&hwstat->syncp); + + for (i = 0; i < ARRAY_SIZE(b44_gstrings); i++) + *data_dst++ = *data_src++; + +- } while (u64_stats_fetch_retry_irq(&hwstat->syncp, start)); ++ } while (u64_stats_fetch_retry(&hwstat->syncp, start)); + } + + static void b44_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c +index 425d6ccd5413..f8b1adc389b3 100644 +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -457,10 +457,10 @@ static void bcm_sysport_update_tx_stats(struct bcm_sysport_priv *priv, + for (q = 0; q < priv->netdev->num_tx_queues; q++) { + ring = &priv->tx_rings[q]; + do { +- start = u64_stats_fetch_begin_irq(&priv->syncp); ++ start = u64_stats_fetch_begin(&priv->syncp); + bytes = ring->bytes; + packets = ring->packets; +- } while (u64_stats_fetch_retry_irq(&priv->syncp, start)); ++ } while (u64_stats_fetch_retry(&priv->syncp, start)); + + *tx_bytes += bytes; + *tx_packets += packets; +@@ -504,9 +504,9 @@ static void bcm_sysport_get_stats(struct net_device *dev, + if (s->stat_sizeof == sizeof(u64) && + s->type == BCM_SYSPORT_STAT_NETDEV64) { + do { +- start = u64_stats_fetch_begin_irq(syncp); ++ start = u64_stats_fetch_begin(syncp); + data[i] = *(u64 *)p; +- } while (u64_stats_fetch_retry_irq(syncp, start)); ++ } while (u64_stats_fetch_retry(syncp, start)); + } else + data[i] = *(u32 *)p; + j++; +@@ -1878,10 +1878,10 @@ static void bcm_sysport_get_stats64(struct net_device *dev, + &stats->tx_packets); + + do { +- start = u64_stats_fetch_begin_irq(&priv->syncp); ++ start = u64_stats_fetch_begin(&priv->syncp); + stats->rx_packets = stats64->rx_packets; + stats->rx_bytes = stats64->rx_bytes; +- } while (u64_stats_fetch_retry_irq(&priv->syncp, start)); ++ } while (u64_stats_fetch_retry(&priv->syncp, start)); + } + + static void bcm_sysport_netif_start(struct net_device *dev) +diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c +index 7c0b0bc033c9..e7137b468f5b 100644 +--- a/drivers/net/ethernet/cortina/gemini.c ++++ b/drivers/net/ethernet/cortina/gemini.c +@@ -1941,7 +1941,7 @@ static void gmac_get_stats64(struct net_device *netdev, + + /* Racing with RX NAPI */ + do { +- start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); ++ start = u64_stats_fetch_begin(&port->rx_stats_syncp); + + stats->rx_packets = port->stats.rx_packets; + stats->rx_bytes = port->stats.rx_bytes; +@@ -1953,11 +1953,11 @@ static void gmac_get_stats64(struct net_device *netdev, + stats->rx_crc_errors = port->stats.rx_crc_errors; + stats->rx_frame_errors = port->stats.rx_frame_errors; + +- } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); + + /* Racing with MIB and TX completion interrupts */ + do { +- start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); ++ start = u64_stats_fetch_begin(&port->ir_stats_syncp); + + stats->tx_errors = port->stats.tx_errors; + stats->tx_packets = port->stats.tx_packets; +@@ -1967,15 +1967,15 @@ static void gmac_get_stats64(struct net_device *netdev, + stats->rx_missed_errors = port->stats.rx_missed_errors; + stats->rx_fifo_errors = port->stats.rx_fifo_errors; + +- } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); + + /* Racing with hard_start_xmit */ + do { +- start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); ++ start = u64_stats_fetch_begin(&port->tx_stats_syncp); + + stats->tx_dropped = port->stats.tx_dropped; + +- } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); + + stats->rx_dropped += stats->rx_missed_errors; + } +@@ -2044,18 +2044,18 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, + /* Racing with MIB interrupt */ + do { + p = values; +- start = u64_stats_fetch_begin_irq(&port->ir_stats_syncp); ++ start = u64_stats_fetch_begin(&port->ir_stats_syncp); + + for (i = 0; i < RX_STATS_NUM; i++) + *p++ = port->hw_stats[i]; + +- } while (u64_stats_fetch_retry_irq(&port->ir_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->ir_stats_syncp, start)); + values = p; + + /* Racing with RX NAPI */ + do { + p = values; +- start = u64_stats_fetch_begin_irq(&port->rx_stats_syncp); ++ start = u64_stats_fetch_begin(&port->rx_stats_syncp); + + for (i = 0; i < RX_STATUS_NUM; i++) + *p++ = port->rx_stats[i]; +@@ -2063,13 +2063,13 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, + *p++ = port->rx_csum_stats[i]; + *p++ = port->rx_napi_exits; + +- } while (u64_stats_fetch_retry_irq(&port->rx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->rx_stats_syncp, start)); + values = p; + + /* Racing with TX start_xmit */ + do { + p = values; +- start = u64_stats_fetch_begin_irq(&port->tx_stats_syncp); ++ start = u64_stats_fetch_begin(&port->tx_stats_syncp); + + for (i = 0; i < TX_MAX_FRAGS; i++) { + *values++ = port->tx_frag_stats[i]; +@@ -2078,7 +2078,7 @@ static void gmac_get_ethtool_stats(struct net_device *netdev, + *values++ = port->tx_frags_linearized; + *values++ = port->tx_hw_csummed; + +- } while (u64_stats_fetch_retry_irq(&port->tx_stats_syncp, start)); ++ } while (u64_stats_fetch_retry(&port->tx_stats_syncp, start)); + } + + static int gmac_get_ksettings(struct net_device *netdev, +diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c +index 77edc3d9b505..a29de29bdf23 100644 +--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c ++++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c +@@ -389,10 +389,10 @@ static void be_get_ethtool_stats(struct net_device *netdev, + struct be_rx_stats *stats = rx_stats(rxo); + + do { +- start = u64_stats_fetch_begin_irq(&stats->sync); ++ start = u64_stats_fetch_begin(&stats->sync); + data[base] = stats->rx_bytes; + data[base + 1] = stats->rx_pkts; +- } while (u64_stats_fetch_retry_irq(&stats->sync, start)); ++ } while (u64_stats_fetch_retry(&stats->sync, start)); + + for (i = 2; i < ETHTOOL_RXSTATS_NUM; i++) { + p = (u8 *)stats + et_rx_stats[i].offset; +@@ -405,19 +405,19 @@ static void be_get_ethtool_stats(struct net_device *netdev, + struct be_tx_stats *stats = tx_stats(txo); + + do { +- start = u64_stats_fetch_begin_irq(&stats->sync_compl); ++ start = u64_stats_fetch_begin(&stats->sync_compl); + data[base] = stats->tx_compl; +- } while (u64_stats_fetch_retry_irq(&stats->sync_compl, start)); ++ } while (u64_stats_fetch_retry(&stats->sync_compl, start)); + + do { +- start = u64_stats_fetch_begin_irq(&stats->sync); ++ start = u64_stats_fetch_begin(&stats->sync); + for (i = 1; i < ETHTOOL_TXSTATS_NUM; i++) { + p = (u8 *)stats + et_tx_stats[i].offset; + data[base + i] = + (et_tx_stats[i].size == sizeof(u64)) ? + *(u64 *)p : *(u32 *)p; + } +- } while (u64_stats_fetch_retry_irq(&stats->sync, start)); ++ } while (u64_stats_fetch_retry(&stats->sync, start)); + base += ETHTOOL_TXSTATS_NUM; + } + } +diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c +index a9e4e6464a04..968fecfc03bd 100644 +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -665,10 +665,10 @@ static void be_get_stats64(struct net_device *netdev, + const struct be_rx_stats *rx_stats = rx_stats(rxo); + + do { +- start = u64_stats_fetch_begin_irq(&rx_stats->sync); ++ start = u64_stats_fetch_begin(&rx_stats->sync); + pkts = rx_stats(rxo)->rx_pkts; + bytes = rx_stats(rxo)->rx_bytes; +- } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start)); ++ } while (u64_stats_fetch_retry(&rx_stats->sync, start)); + stats->rx_packets += pkts; + stats->rx_bytes += bytes; + stats->multicast += rx_stats(rxo)->rx_mcast_pkts; +@@ -680,10 +680,10 @@ static void be_get_stats64(struct net_device *netdev, + const struct be_tx_stats *tx_stats = tx_stats(txo); + + do { +- start = u64_stats_fetch_begin_irq(&tx_stats->sync); ++ start = u64_stats_fetch_begin(&tx_stats->sync); + pkts = tx_stats(txo)->tx_pkts; + bytes = tx_stats(txo)->tx_bytes; +- } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start)); ++ } while (u64_stats_fetch_retry(&tx_stats->sync, start)); + stats->tx_packets += pkts; + stats->tx_bytes += bytes; + } +@@ -2156,16 +2156,16 @@ static int be_get_new_eqd(struct be_eq_obj *eqo) + + for_all_rx_queues_on_eq(adapter, eqo, rxo, i) { + do { +- start = u64_stats_fetch_begin_irq(&rxo->stats.sync); ++ start = u64_stats_fetch_begin(&rxo->stats.sync); + rx_pkts += rxo->stats.rx_pkts; +- } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start)); ++ } while (u64_stats_fetch_retry(&rxo->stats.sync, start)); + } + + for_all_tx_queues_on_eq(adapter, eqo, txo, i) { + do { +- start = u64_stats_fetch_begin_irq(&txo->stats.sync); ++ start = u64_stats_fetch_begin(&txo->stats.sync); + tx_pkts += txo->stats.tx_reqs; +- } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start)); ++ } while (u64_stats_fetch_retry(&txo->stats.sync, start)); + } + + /* Skip, if wrapped around or first calculation */ +diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +index 671f51135c26..53b7e95213a8 100644 +--- a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h ++++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h +@@ -206,9 +206,9 @@ struct funeth_rxq { + + #define FUN_QSTAT_READ(q, seq, stats_copy) \ + do { \ +- seq = u64_stats_fetch_begin_irq(&(q)->syncp); \ ++ seq = u64_stats_fetch_begin(&(q)->syncp); \ + stats_copy = (q)->stats; \ +- } while (u64_stats_fetch_retry_irq(&(q)->syncp, (seq))) ++ } while (u64_stats_fetch_retry(&(q)->syncp, (seq))) + + #define FUN_INT_NAME_LEN (IFNAMSIZ + 16) + +diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c +index 033f17cb96be..0a5953089a24 100644 +--- a/drivers/net/ethernet/google/gve/gve_ethtool.c ++++ b/drivers/net/ethernet/google/gve/gve_ethtool.c +@@ -177,14 +177,14 @@ gve_get_ethtool_stats(struct net_device *netdev, + struct gve_rx_ring *rx = &priv->rx[ring]; + + start = +- u64_stats_fetch_begin_irq(&priv->rx[ring].statss); ++ u64_stats_fetch_begin(&priv->rx[ring].statss); + tmp_rx_pkts = rx->rpackets; + tmp_rx_bytes = rx->rbytes; + tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; + tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; + tmp_rx_desc_err_dropped_pkt = + rx->rx_desc_err_dropped_pkt; +- } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + rx_pkts += tmp_rx_pkts; + rx_bytes += tmp_rx_bytes; +@@ -198,10 +198,10 @@ gve_get_ethtool_stats(struct net_device *netdev, + if (priv->tx) { + do { + start = +- u64_stats_fetch_begin_irq(&priv->tx[ring].statss); ++ u64_stats_fetch_begin(&priv->tx[ring].statss); + tmp_tx_pkts = priv->tx[ring].pkt_done; + tmp_tx_bytes = priv->tx[ring].bytes_done; +- } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + tx_pkts += tmp_tx_pkts; + tx_bytes += tmp_tx_bytes; +@@ -259,13 +259,13 @@ gve_get_ethtool_stats(struct net_device *netdev, + data[i++] = rx->fill_cnt - rx->cnt; + do { + start = +- u64_stats_fetch_begin_irq(&priv->rx[ring].statss); ++ u64_stats_fetch_begin(&priv->rx[ring].statss); + tmp_rx_bytes = rx->rbytes; + tmp_rx_skb_alloc_fail = rx->rx_skb_alloc_fail; + tmp_rx_buf_alloc_fail = rx->rx_buf_alloc_fail; + tmp_rx_desc_err_dropped_pkt = + rx->rx_desc_err_dropped_pkt; +- } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + data[i++] = tmp_rx_bytes; + data[i++] = rx->rx_cont_packet_cnt; +@@ -331,9 +331,9 @@ gve_get_ethtool_stats(struct net_device *netdev, + } + do { + start = +- u64_stats_fetch_begin_irq(&priv->tx[ring].statss); ++ u64_stats_fetch_begin(&priv->tx[ring].statss); + tmp_tx_bytes = tx->bytes_done; +- } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + data[i++] = tmp_tx_bytes; + data[i++] = tx->wake_queue; +diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c +index d3f6ad586ba1..1c2cd3ee1956 100644 +--- a/drivers/net/ethernet/google/gve/gve_main.c ++++ b/drivers/net/ethernet/google/gve/gve_main.c +@@ -51,10 +51,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + do { + start = +- u64_stats_fetch_begin_irq(&priv->rx[ring].statss); ++ u64_stats_fetch_begin(&priv->rx[ring].statss); + packets = priv->rx[ring].rpackets; + bytes = priv->rx[ring].rbytes; +- } while (u64_stats_fetch_retry_irq(&priv->rx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + s->rx_packets += packets; + s->rx_bytes += bytes; +@@ -64,10 +64,10 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) + for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + do { + start = +- u64_stats_fetch_begin_irq(&priv->tx[ring].statss); ++ u64_stats_fetch_begin(&priv->tx[ring].statss); + packets = priv->tx[ring].pkt_done; + bytes = priv->tx[ring].bytes_done; +- } while (u64_stats_fetch_retry_irq(&priv->tx[ring].statss, ++ } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + s->tx_packets += packets; + s->tx_bytes += bytes; +@@ -1260,9 +1260,9 @@ void gve_handle_report_stats(struct gve_priv *priv) + } + + do { +- start = u64_stats_fetch_begin_irq(&priv->tx[idx].statss); ++ start = u64_stats_fetch_begin(&priv->tx[idx].statss); + tx_bytes = priv->tx[idx].bytes_done; +- } while (u64_stats_fetch_retry_irq(&priv->tx[idx].statss, start)); ++ } while (u64_stats_fetch_retry(&priv->tx[idx].statss, start)); + stats[stats_idx++] = (struct stats) { + .stat_name = cpu_to_be32(TX_WAKE_CNT), + .value = cpu_to_be64(priv->tx[idx].wake_queue), +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +index 78d6752fe051..5bf81dca14fa 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +@@ -2496,7 +2496,7 @@ static void hns3_fetch_stats(struct rtnl_link_stats64 *stats, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + if (is_tx) { + stats->tx_bytes += ring->stats.tx_bytes; + stats->tx_packets += ring->stats.tx_pkts; +@@ -2530,7 +2530,7 @@ static void hns3_fetch_stats(struct rtnl_link_stats64 *stats, + stats->multicast += ring->stats.rx_multicast; + stats->rx_length_errors += ring->stats.err_pkt_len; + } +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + } + + static void hns3_nic_get_stats64(struct net_device *netdev, +diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c +index d649c6e323c8..ceec8be2a73b 100644 +--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c +@@ -74,14 +74,14 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats) + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&rxq_stats->syncp); ++ start = u64_stats_fetch_begin(&rxq_stats->syncp); + stats->pkts = rxq_stats->pkts; + stats->bytes = rxq_stats->bytes; + stats->errors = rxq_stats->csum_errors + + rxq_stats->other_errors; + stats->csum_errors = rxq_stats->csum_errors; + stats->other_errors = rxq_stats->other_errors; +- } while (u64_stats_fetch_retry_irq(&rxq_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); + } + + /** +diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c +index e91476c8ff8b..ad47ac51a139 100644 +--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c ++++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c +@@ -99,14 +99,14 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&txq_stats->syncp); ++ start = u64_stats_fetch_begin(&txq_stats->syncp); + stats->pkts = txq_stats->pkts; + stats->bytes = txq_stats->bytes; + stats->tx_busy = txq_stats->tx_busy; + stats->tx_wake = txq_stats->tx_wake; + stats->tx_dropped = txq_stats->tx_dropped; + stats->big_frags_pkts = txq_stats->big_frags_pkts; +- } while (u64_stats_fetch_retry_irq(&txq_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); + } + + /** +diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +index 2cca9e84e31e..34ab5ff9823b 100644 +--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c ++++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +@@ -1229,10 +1229,10 @@ static void fm10k_get_stats64(struct net_device *netdev, + continue; + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + + stats->rx_packets += packets; + stats->rx_bytes += bytes; +@@ -1245,10 +1245,10 @@ static void fm10k_get_stats64(struct net_device *netdev, + continue; + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + + stats->tx_packets += packets; + stats->tx_bytes += bytes; +diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +index 107bcca7db8c..8f36fe90180f 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +@@ -154,7 +154,7 @@ __i40e_add_ethtool_stats(u64 **data, void *pointer, + * @ring: the ring to copy + * + * Queue statistics must be copied while protected by +- * u64_stats_fetch_begin_irq, so we can't directly use i40e_add_ethtool_stats. ++ * u64_stats_fetch_begin, so we can't directly use i40e_add_ethtool_stats. + * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the + * ring pointer is null, zero out the queue stat values and update the data + * pointer. Otherwise safely copy the stats from the ring into the supplied +@@ -172,16 +172,16 @@ i40e_add_queue_stats(u64 **data, struct i40e_ring *ring) + + /* To avoid invalid statistics values, ensure that we keep retrying + * the copy until we get a consistent value according to +- * u64_stats_fetch_retry_irq. But first, make sure our ring is ++ * u64_stats_fetch_retry. But first, make sure our ring is + * non-null before attempting to access its syncp. + */ + do { +- start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); ++ start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp); + for (i = 0; i < size; i++) { + i40e_add_one_ethtool_stat(&(*data)[i], ring, + &stats[i]); + } +- } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (ring && u64_stats_fetch_retry(&ring->syncp, start)); + + /* Once we successfully copy the stats in, update the data pointer */ + *data += size; +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index b4157ff370a3..3b1f912c4c2b 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -419,10 +419,10 @@ static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + + stats->tx_packets += packets; + stats->tx_bytes += bytes; +@@ -472,10 +472,10 @@ static void i40e_get_netdev_stats_struct(struct net_device *netdev, + if (!ring) + continue; + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + + stats->rx_packets += packets; + stats->rx_bytes += bytes; +@@ -897,10 +897,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) + continue; + + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + packets = p->stats.packets; + bytes = p->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + tx_b += bytes; + tx_p += packets; + tx_restart += p->tx_stats.restart_queue; +@@ -915,10 +915,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) + continue; + + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + packets = p->stats.packets; + bytes = p->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + rx_b += bytes; + rx_p += packets; + rx_buf += p->rx_stats.alloc_buff_failed; +@@ -935,10 +935,10 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) + continue; + + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + packets = p->stats.packets; + bytes = p->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + tx_b += bytes; + tx_p += packets; + tx_restart += p->tx_stats.restart_queue; +diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +index f4ac2b164b3e..892c6a4f03bb 100644 +--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c ++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +@@ -147,7 +147,7 @@ __iavf_add_ethtool_stats(u64 **data, void *pointer, + * @ring: the ring to copy + * + * Queue statistics must be copied while protected by +- * u64_stats_fetch_begin_irq, so we can't directly use iavf_add_ethtool_stats. ++ * u64_stats_fetch_begin, so we can't directly use iavf_add_ethtool_stats. + * Assumes that queue stats are defined in iavf_gstrings_queue_stats. If the + * ring pointer is null, zero out the queue stat values and update the data + * pointer. Otherwise safely copy the stats from the ring into the supplied +@@ -165,14 +165,14 @@ iavf_add_queue_stats(u64 **data, struct iavf_ring *ring) + + /* To avoid invalid statistics values, ensure that we keep retrying + * the copy until we get a consistent value according to +- * u64_stats_fetch_retry_irq. But first, make sure our ring is ++ * u64_stats_fetch_retry. But first, make sure our ring is + * non-null before attempting to access its syncp. + */ + do { +- start = !ring ? 0 : u64_stats_fetch_begin_irq(&ring->syncp); ++ start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp); + for (i = 0; i < size; i++) + iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]); +- } while (ring && u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (ring && u64_stats_fetch_retry(&ring->syncp, start)); + + /* Once we successfully copy the stats in, update the data pointer */ + *data += size; +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c +index f0f39364819a..f8d722339e89 100644 +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -6419,10 +6419,10 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(syncp); ++ start = u64_stats_fetch_begin(syncp); + *pkts = stats.pkts; + *bytes = stats.bytes; +- } while (u64_stats_fetch_retry_irq(syncp, start)); ++ } while (u64_stats_fetch_retry(syncp, start)); + } + + /** +diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c +index ceff537d9d22..4ee849985e2b 100644 +--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c ++++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c +@@ -2316,15 +2316,15 @@ static void igb_get_ethtool_stats(struct net_device *netdev, + + ring = adapter->tx_ring[j]; + do { +- start = u64_stats_fetch_begin_irq(&ring->tx_syncp); ++ start = u64_stats_fetch_begin(&ring->tx_syncp); + data[i] = ring->tx_stats.packets; + data[i+1] = ring->tx_stats.bytes; + data[i+2] = ring->tx_stats.restart_queue; +- } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); + do { +- start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); ++ start = u64_stats_fetch_begin(&ring->tx_syncp2); + restart2 = ring->tx_stats.restart_queue2; +- } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); ++ } while (u64_stats_fetch_retry(&ring->tx_syncp2, start)); + data[i+2] += restart2; + + i += IGB_TX_QUEUE_STATS_LEN; +@@ -2332,13 +2332,13 @@ static void igb_get_ethtool_stats(struct net_device *netdev, + for (j = 0; j < adapter->num_rx_queues; j++) { + ring = adapter->rx_ring[j]; + do { +- start = u64_stats_fetch_begin_irq(&ring->rx_syncp); ++ start = u64_stats_fetch_begin(&ring->rx_syncp); + data[i] = ring->rx_stats.packets; + data[i+1] = ring->rx_stats.bytes; + data[i+2] = ring->rx_stats.drops; + data[i+3] = ring->rx_stats.csum_err; + data[i+4] = ring->rx_stats.alloc_failed; +- } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); + i += IGB_RX_QUEUE_STATS_LEN; + } + spin_unlock(&adapter->stats64_lock); +diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c +index 45ce4ed16146..9824f7cfaca4 100644 +--- a/drivers/net/ethernet/intel/igb/igb_main.c ++++ b/drivers/net/ethernet/intel/igb/igb_main.c +@@ -6660,10 +6660,10 @@ void igb_update_stats(struct igb_adapter *adapter) + } + + do { +- start = u64_stats_fetch_begin_irq(&ring->rx_syncp); ++ start = u64_stats_fetch_begin(&ring->rx_syncp); + _bytes = ring->rx_stats.bytes; + _packets = ring->rx_stats.packets; +- } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); + bytes += _bytes; + packets += _packets; + } +@@ -6676,10 +6676,10 @@ void igb_update_stats(struct igb_adapter *adapter) + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igb_ring *ring = adapter->tx_ring[i]; + do { +- start = u64_stats_fetch_begin_irq(&ring->tx_syncp); ++ start = u64_stats_fetch_begin(&ring->tx_syncp); + _bytes = ring->tx_stats.bytes; + _packets = ring->tx_stats.packets; +- } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); + bytes += _bytes; + packets += _packets; + } +diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c +index 81897f7a90a9..dd8a9d27a167 100644 +--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c ++++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c +@@ -840,15 +840,15 @@ static void igc_ethtool_get_stats(struct net_device *netdev, + + ring = adapter->tx_ring[j]; + do { +- start = u64_stats_fetch_begin_irq(&ring->tx_syncp); ++ start = u64_stats_fetch_begin(&ring->tx_syncp); + data[i] = ring->tx_stats.packets; + data[i + 1] = ring->tx_stats.bytes; + data[i + 2] = ring->tx_stats.restart_queue; +- } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); + do { +- start = u64_stats_fetch_begin_irq(&ring->tx_syncp2); ++ start = u64_stats_fetch_begin(&ring->tx_syncp2); + restart2 = ring->tx_stats.restart_queue2; +- } while (u64_stats_fetch_retry_irq(&ring->tx_syncp2, start)); ++ } while (u64_stats_fetch_retry(&ring->tx_syncp2, start)); + data[i + 2] += restart2; + + i += IGC_TX_QUEUE_STATS_LEN; +@@ -856,13 +856,13 @@ static void igc_ethtool_get_stats(struct net_device *netdev, + for (j = 0; j < adapter->num_rx_queues; j++) { + ring = adapter->rx_ring[j]; + do { +- start = u64_stats_fetch_begin_irq(&ring->rx_syncp); ++ start = u64_stats_fetch_begin(&ring->rx_syncp); + data[i] = ring->rx_stats.packets; + data[i + 1] = ring->rx_stats.bytes; + data[i + 2] = ring->rx_stats.drops; + data[i + 3] = ring->rx_stats.csum_err; + data[i + 4] = ring->rx_stats.alloc_failed; +- } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); + i += IGC_RX_QUEUE_STATS_LEN; + } + spin_unlock(&adapter->stats64_lock); +diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c +index 4b6f882b380d..c8c5c9d96ba2 100644 +--- a/drivers/net/ethernet/intel/igc/igc_main.c ++++ b/drivers/net/ethernet/intel/igc/igc_main.c +@@ -4868,10 +4868,10 @@ void igc_update_stats(struct igc_adapter *adapter) + } + + do { +- start = u64_stats_fetch_begin_irq(&ring->rx_syncp); ++ start = u64_stats_fetch_begin(&ring->rx_syncp); + _bytes = ring->rx_stats.bytes; + _packets = ring->rx_stats.packets; +- } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->rx_syncp, start)); + bytes += _bytes; + packets += _packets; + } +@@ -4885,10 +4885,10 @@ void igc_update_stats(struct igc_adapter *adapter) + struct igc_ring *ring = adapter->tx_ring[i]; + + do { +- start = u64_stats_fetch_begin_irq(&ring->tx_syncp); ++ start = u64_stats_fetch_begin(&ring->tx_syncp); + _bytes = ring->tx_stats.bytes; + _packets = ring->tx_stats.packets; +- } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->tx_syncp, start)); + bytes += _bytes; + packets += _packets; + } +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +index 0051aa676e19..1c22ff2dba9b 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +@@ -1335,10 +1335,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, + } + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + data[i] = ring->stats.packets; + data[i+1] = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + i += 2; + } + for (j = 0; j < IXGBE_NUM_RX_QUEUES; j++) { +@@ -1351,10 +1351,10 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, + } + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + data[i] = ring->stats.packets; + data[i+1] = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + i += 2; + } + +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +index 9e0e13638c46..ec86b61a8db8 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -9051,10 +9051,10 @@ static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats, + + if (ring) { + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + stats->tx_packets += packets; + stats->tx_bytes += bytes; + } +@@ -9074,10 +9074,10 @@ static void ixgbe_get_stats64(struct net_device *netdev, + + if (ring) { + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + packets = ring->stats.packets; + bytes = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + stats->rx_packets += packets; + stats->rx_bytes += bytes; + } +diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c +index ccfa6b91aac6..296915414a7c 100644 +--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c +@@ -458,10 +458,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, + } + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + i += 2; + } + +@@ -475,10 +475,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, + } + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + i += 2; + } + +@@ -492,10 +492,10 @@ static void ixgbevf_get_ethtool_stats(struct net_device *netdev, + } + + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + data[i] = ring->stats.packets; + data[i + 1] = ring->stats.bytes; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + i += 2; + } + } +diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +index e338fa572793..a9479ddf68eb 100644 +--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c ++++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +@@ -4350,10 +4350,10 @@ static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats, + + if (ring) { + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + bytes = ring->stats.bytes; + packets = ring->stats.packets; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + stats->tx_bytes += bytes; + stats->tx_packets += packets; + } +@@ -4376,10 +4376,10 @@ static void ixgbevf_get_stats(struct net_device *netdev, + for (i = 0; i < adapter->num_rx_queues; i++) { + ring = adapter->rx_ring[i]; + do { +- start = u64_stats_fetch_begin_irq(&ring->syncp); ++ start = u64_stats_fetch_begin(&ring->syncp); + bytes = ring->stats.bytes; + packets = ring->stats.packets; +- } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); ++ } while (u64_stats_fetch_retry(&ring->syncp, start)); + stats->rx_bytes += bytes; + stats->rx_packets += packets; + } +diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c +index eb4ebaa1c92f..327f03f80836 100644 +--- a/drivers/net/ethernet/marvell/mvneta.c ++++ b/drivers/net/ethernet/marvell/mvneta.c +@@ -813,14 +813,14 @@ mvneta_get_stats64(struct net_device *dev, + + cpu_stats = per_cpu_ptr(pp->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + rx_packets = cpu_stats->es.ps.rx_packets; + rx_bytes = cpu_stats->es.ps.rx_bytes; + rx_dropped = cpu_stats->rx_dropped; + rx_errors = cpu_stats->rx_errors; + tx_packets = cpu_stats->es.ps.tx_packets; + tx_bytes = cpu_stats->es.ps.tx_bytes; +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; +@@ -4765,7 +4765,7 @@ mvneta_ethtool_update_pcpu_stats(struct mvneta_port *pp, + + stats = per_cpu_ptr(pp->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + skb_alloc_error = stats->es.skb_alloc_error; + refill_error = stats->es.refill_error; + xdp_redirect = stats->es.ps.xdp_redirect; +@@ -4775,7 +4775,7 @@ mvneta_ethtool_update_pcpu_stats(struct mvneta_port *pp, + xdp_xmit_err = stats->es.ps.xdp_xmit_err; + xdp_tx = stats->es.ps.xdp_tx; + xdp_tx_err = stats->es.ps.xdp_tx_err; +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + es->skb_alloc_error += skb_alloc_error; + es->refill_error += refill_error; +diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +index f936640cca4e..8c7470ab4985 100644 +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +@@ -2008,7 +2008,7 @@ mvpp2_get_xdp_stats(struct mvpp2_port *port, struct mvpp2_pcpu_stats *xdp_stats) + + cpu_stats = per_cpu_ptr(port->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + xdp_redirect = cpu_stats->xdp_redirect; + xdp_pass = cpu_stats->xdp_pass; + xdp_drop = cpu_stats->xdp_drop; +@@ -2016,7 +2016,7 @@ mvpp2_get_xdp_stats(struct mvpp2_port *port, struct mvpp2_pcpu_stats *xdp_stats) + xdp_xmit_err = cpu_stats->xdp_xmit_err; + xdp_tx = cpu_stats->xdp_tx; + xdp_tx_err = cpu_stats->xdp_tx_err; +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + xdp_stats->xdp_redirect += xdp_redirect; + xdp_stats->xdp_pass += xdp_pass; +@@ -5115,12 +5115,12 @@ mvpp2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) + + cpu_stats = per_cpu_ptr(port->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + rx_packets = cpu_stats->rx_packets; + rx_bytes = cpu_stats->rx_bytes; + tx_packets = cpu_stats->tx_packets; + tx_bytes = cpu_stats->tx_bytes; +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; +diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c +index ab33ba1c3023..ff97b140886a 100644 +--- a/drivers/net/ethernet/marvell/sky2.c ++++ b/drivers/net/ethernet/marvell/sky2.c +@@ -3894,19 +3894,19 @@ static void sky2_get_stats(struct net_device *dev, + u64 _bytes, _packets; + + do { +- start = u64_stats_fetch_begin_irq(&sky2->rx_stats.syncp); ++ start = u64_stats_fetch_begin(&sky2->rx_stats.syncp); + _bytes = sky2->rx_stats.bytes; + _packets = sky2->rx_stats.packets; +- } while (u64_stats_fetch_retry_irq(&sky2->rx_stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&sky2->rx_stats.syncp, start)); + + stats->rx_packets = _packets; + stats->rx_bytes = _bytes; + + do { +- start = u64_stats_fetch_begin_irq(&sky2->tx_stats.syncp); ++ start = u64_stats_fetch_begin(&sky2->tx_stats.syncp); + _bytes = sky2->tx_stats.bytes; + _packets = sky2->tx_stats.packets; +- } while (u64_stats_fetch_retry_irq(&sky2->tx_stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&sky2->tx_stats.syncp, start)); + + stats->tx_packets = _packets; + stats->tx_bytes = _bytes; +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +index 17e6ac4445af..f737e2b9a29e 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -866,7 +866,7 @@ static void mtk_get_stats64(struct net_device *dev, + } + + do { +- start = u64_stats_fetch_begin_irq(&hw_stats->syncp); ++ start = u64_stats_fetch_begin(&hw_stats->syncp); + storage->rx_packets = hw_stats->rx_packets; + storage->tx_packets = hw_stats->tx_packets; + storage->rx_bytes = hw_stats->rx_bytes; +@@ -878,7 +878,7 @@ static void mtk_get_stats64(struct net_device *dev, + storage->rx_crc_errors = hw_stats->rx_fcs_errors; + storage->rx_errors = hw_stats->rx_checksum_errors; + storage->tx_aborted_errors = hw_stats->tx_skip; +- } while (u64_stats_fetch_retry_irq(&hw_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&hw_stats->syncp, start)); + + storage->tx_errors = dev->stats.tx_errors; + storage->rx_dropped = dev->stats.rx_dropped; +@@ -3694,13 +3694,13 @@ static void mtk_get_ethtool_stats(struct net_device *dev, + + do { + data_dst = data; +- start = u64_stats_fetch_begin_irq(&hwstats->syncp); ++ start = u64_stats_fetch_begin(&hwstats->syncp); + + for (i = 0; i < ARRAY_SIZE(mtk_ethtool_stats); i++) + *data_dst++ = *(data_src + mtk_ethtool_stats[i].offset); + if (mtk_page_pool_enabled(mac->hw)) + mtk_ethtool_pp_stats(mac->hw, data_dst); +- } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start)); ++ } while (u64_stats_fetch_retry(&hwstats->syncp, start)); + } + + static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, +diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +index 67ecdb9e708f..8345499563a4 100644 +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +@@ -827,12 +827,12 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, + for_each_possible_cpu(i) { + p = per_cpu_ptr(mlxsw_sp_port->pcpu_stats, i); + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + rx_packets = p->rx_packets; + rx_bytes = p->rx_bytes; + tx_packets = p->tx_packets; + tx_bytes = p->tx_bytes; +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; +diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +index 27f4786ace4f..a5ca5c4a7896 100644 +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +@@ -1631,21 +1631,21 @@ static void nfp_net_stat64(struct net_device *netdev, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&r_vec->rx_sync); ++ start = u64_stats_fetch_begin(&r_vec->rx_sync); + data[0] = r_vec->rx_pkts; + data[1] = r_vec->rx_bytes; + data[2] = r_vec->rx_drops; +- } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start)); ++ } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + stats->rx_packets += data[0]; + stats->rx_bytes += data[1]; + stats->rx_dropped += data[2]; + + do { +- start = u64_stats_fetch_begin_irq(&r_vec->tx_sync); ++ start = u64_stats_fetch_begin(&r_vec->tx_sync); + data[0] = r_vec->tx_pkts; + data[1] = r_vec->tx_bytes; + data[2] = r_vec->tx_errors; +- } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start)); ++ } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + stats->tx_packets += data[0]; + stats->tx_bytes += data[1]; + stats->tx_errors += data[2]; +diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +index af376b900067..cc97b3d00414 100644 +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +@@ -881,7 +881,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync); ++ start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); + data[0] = nn->r_vecs[i].rx_pkts; + tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; + tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; +@@ -889,10 +889,10 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) + tmp[3] = nn->r_vecs[i].hw_csum_rx_error; + tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; + tmp[5] = nn->r_vecs[i].hw_tls_rx; +- } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start)); ++ } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); + + do { +- start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync); ++ start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); + data[1] = nn->r_vecs[i].tx_pkts; + data[2] = nn->r_vecs[i].tx_busy; + tmp[6] = nn->r_vecs[i].hw_csum_tx; +@@ -902,7 +902,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) + tmp[10] = nn->r_vecs[i].hw_tls_tx; + tmp[11] = nn->r_vecs[i].tls_tx_fallback; + tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; +- } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start)); ++ } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); + + data += NN_RVEC_PER_Q_STATS; + +diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +index 8b77582bdfa0..a6b6ca1fd55e 100644 +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +@@ -134,13 +134,13 @@ nfp_repr_get_host_stats64(const struct net_device *netdev, + + repr_stats = per_cpu_ptr(repr->stats, i); + do { +- start = u64_stats_fetch_begin_irq(&repr_stats->syncp); ++ start = u64_stats_fetch_begin(&repr_stats->syncp); + tbytes = repr_stats->tx_bytes; + tpkts = repr_stats->tx_packets; + tdrops = repr_stats->tx_drops; + rbytes = repr_stats->rx_bytes; + rpkts = repr_stats->rx_packets; +- } while (u64_stats_fetch_retry_irq(&repr_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&repr_stats->syncp, start)); + + stats->tx_bytes += tbytes; + stats->tx_packets += tpkts; +diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c +index 486cbc8ab224..7a549b834e97 100644 +--- a/drivers/net/ethernet/nvidia/forcedeth.c ++++ b/drivers/net/ethernet/nvidia/forcedeth.c +@@ -1734,12 +1734,12 @@ static void nv_get_stats(int cpu, struct fe_priv *np, + u64 tx_packets, tx_bytes, tx_dropped; + + do { +- syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp); ++ syncp_start = u64_stats_fetch_begin(&np->swstats_rx_syncp); + rx_packets = src->stat_rx_packets; + rx_bytes = src->stat_rx_bytes; + rx_dropped = src->stat_rx_dropped; + rx_missed_errors = src->stat_rx_missed_errors; +- } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start)); ++ } while (u64_stats_fetch_retry(&np->swstats_rx_syncp, syncp_start)); + + storage->rx_packets += rx_packets; + storage->rx_bytes += rx_bytes; +@@ -1747,11 +1747,11 @@ static void nv_get_stats(int cpu, struct fe_priv *np, + storage->rx_missed_errors += rx_missed_errors; + + do { +- syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp); ++ syncp_start = u64_stats_fetch_begin(&np->swstats_tx_syncp); + tx_packets = src->stat_tx_packets; + tx_bytes = src->stat_tx_bytes; + tx_dropped = src->stat_tx_dropped; +- } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start)); ++ } while (u64_stats_fetch_retry(&np->swstats_tx_syncp, syncp_start)); + + storage->tx_packets += tx_packets; + storage->tx_bytes += tx_bytes; +diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +index 1b2119b1d48a..3f5e6572d20e 100644 +--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c ++++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +@@ -135,9 +135,9 @@ static void rmnet_get_stats64(struct net_device *dev, + pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu); + + do { +- start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); ++ start = u64_stats_fetch_begin(&pcpu_ptr->syncp); + snapshot = pcpu_ptr->stats; /* struct assignment */ +- } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start)); ++ } while (u64_stats_fetch_retry(&pcpu_ptr->syncp, start)); + + total_stats.rx_pkts += snapshot.rx_pkts; + total_stats.rx_bytes += snapshot.rx_bytes; +diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c +index 469e2e229c6e..9ce0e8a64ba8 100644 +--- a/drivers/net/ethernet/realtek/8139too.c ++++ b/drivers/net/ethernet/realtek/8139too.c +@@ -2532,16 +2532,16 @@ rtl8139_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) + netdev_stats_to_stats64(stats, &dev->stats); + + do { +- start = u64_stats_fetch_begin_irq(&tp->rx_stats.syncp); ++ start = u64_stats_fetch_begin(&tp->rx_stats.syncp); + stats->rx_packets = tp->rx_stats.packets; + stats->rx_bytes = tp->rx_stats.bytes; +- } while (u64_stats_fetch_retry_irq(&tp->rx_stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&tp->rx_stats.syncp, start)); + + do { +- start = u64_stats_fetch_begin_irq(&tp->tx_stats.syncp); ++ start = u64_stats_fetch_begin(&tp->tx_stats.syncp); + stats->tx_packets = tp->tx_stats.packets; + stats->tx_bytes = tp->tx_stats.bytes; +- } while (u64_stats_fetch_retry_irq(&tp->tx_stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&tp->tx_stats.syncp, start)); + } + + /* Set or clear the multicast filter for this adaptor. +diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c +index d2c6a5dfdc0e..b7e24ae92525 100644 +--- a/drivers/net/ethernet/socionext/sni_ave.c ++++ b/drivers/net/ethernet/socionext/sni_ave.c +@@ -1508,16 +1508,16 @@ static void ave_get_stats64(struct net_device *ndev, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&priv->stats_rx.syncp); ++ start = u64_stats_fetch_begin(&priv->stats_rx.syncp); + stats->rx_packets = priv->stats_rx.packets; + stats->rx_bytes = priv->stats_rx.bytes; +- } while (u64_stats_fetch_retry_irq(&priv->stats_rx.syncp, start)); ++ } while (u64_stats_fetch_retry(&priv->stats_rx.syncp, start)); + + do { +- start = u64_stats_fetch_begin_irq(&priv->stats_tx.syncp); ++ start = u64_stats_fetch_begin(&priv->stats_tx.syncp); + stats->tx_packets = priv->stats_tx.packets; + stats->tx_bytes = priv->stats_tx.bytes; +- } while (u64_stats_fetch_retry_irq(&priv->stats_tx.syncp, start)); ++ } while (u64_stats_fetch_retry(&priv->stats_tx.syncp, start)); + + stats->rx_errors = priv->stats_rx.errors; + stats->tx_errors = priv->stats_tx.errors; +diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c +index 9f2553799895..1085b0642c28 100644 +--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c ++++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c +@@ -1376,12 +1376,12 @@ static void am65_cpsw_nuss_ndo_get_stats(struct net_device *dev, + + cpu_stats = per_cpu_ptr(ndev_priv->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + rx_packets = cpu_stats->rx_packets; + rx_bytes = cpu_stats->rx_bytes; + tx_packets = cpu_stats->tx_packets; + tx_bytes = cpu_stats->tx_bytes; +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; +diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c +index 9eb9eaff4dc9..1bb596a9d8a2 100644 +--- a/drivers/net/ethernet/ti/netcp_core.c ++++ b/drivers/net/ethernet/ti/netcp_core.c +@@ -1916,16 +1916,16 @@ netcp_get_stats(struct net_device *ndev, struct rtnl_link_stats64 *stats) + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&p->syncp_rx); ++ start = u64_stats_fetch_begin(&p->syncp_rx); + rxpackets = p->rx_packets; + rxbytes = p->rx_bytes; +- } while (u64_stats_fetch_retry_irq(&p->syncp_rx, start)); ++ } while (u64_stats_fetch_retry(&p->syncp_rx, start)); + + do { +- start = u64_stats_fetch_begin_irq(&p->syncp_tx); ++ start = u64_stats_fetch_begin(&p->syncp_tx); + txpackets = p->tx_packets; + txbytes = p->tx_bytes; +- } while (u64_stats_fetch_retry_irq(&p->syncp_tx, start)); ++ } while (u64_stats_fetch_retry(&p->syncp_tx, start)); + + stats->rx_packets = rxpackets; + stats->rx_bytes = rxbytes; +diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c +index 0fb15a17b547..d716e6fe26e1 100644 +--- a/drivers/net/ethernet/via/via-rhine.c ++++ b/drivers/net/ethernet/via/via-rhine.c +@@ -2217,16 +2217,16 @@ rhine_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) + netdev_stats_to_stats64(stats, &dev->stats); + + do { +- start = u64_stats_fetch_begin_irq(&rp->rx_stats.syncp); ++ start = u64_stats_fetch_begin(&rp->rx_stats.syncp); + stats->rx_packets = rp->rx_stats.packets; + stats->rx_bytes = rp->rx_stats.bytes; +- } while (u64_stats_fetch_retry_irq(&rp->rx_stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&rp->rx_stats.syncp, start)); + + do { +- start = u64_stats_fetch_begin_irq(&rp->tx_stats.syncp); ++ start = u64_stats_fetch_begin(&rp->tx_stats.syncp); + stats->tx_packets = rp->tx_stats.packets; + stats->tx_bytes = rp->tx_stats.bytes; +- } while (u64_stats_fetch_retry_irq(&rp->tx_stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&rp->tx_stats.syncp, start)); + } + + static void rhine_set_rx_mode(struct net_device *dev) +diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +index 5ea9dc251dd9..c678876a7826 100644 +--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c ++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +@@ -1305,16 +1305,16 @@ axienet_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) + netdev_stats_to_stats64(stats, &dev->stats); + + do { +- start = u64_stats_fetch_begin_irq(&lp->rx_stat_sync); ++ start = u64_stats_fetch_begin(&lp->rx_stat_sync); + stats->rx_packets = u64_stats_read(&lp->rx_packets); + stats->rx_bytes = u64_stats_read(&lp->rx_bytes); +- } while (u64_stats_fetch_retry_irq(&lp->rx_stat_sync, start)); ++ } while (u64_stats_fetch_retry(&lp->rx_stat_sync, start)); + + do { +- start = u64_stats_fetch_begin_irq(&lp->tx_stat_sync); ++ start = u64_stats_fetch_begin(&lp->tx_stat_sync); + stats->tx_packets = u64_stats_read(&lp->tx_packets); + stats->tx_bytes = u64_stats_read(&lp->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&lp->tx_stat_sync, start)); ++ } while (u64_stats_fetch_retry(&lp->tx_stat_sync, start)); + } + + static const struct net_device_ops axienet_netdev_ops = { +diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c +index 0285894c892a..cf87d7ed3779 100644 +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -1264,12 +1264,12 @@ static void netvsc_get_vf_stats(struct net_device *net, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + rx_packets = stats->rx_packets; + tx_packets = stats->tx_packets; + rx_bytes = stats->rx_bytes; + tx_bytes = stats->tx_bytes; +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; +@@ -1294,12 +1294,12 @@ static void netvsc_get_pcpu_stats(struct net_device *net, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + this_tot->vf_rx_packets = stats->rx_packets; + this_tot->vf_tx_packets = stats->tx_packets; + this_tot->vf_rx_bytes = stats->rx_bytes; + this_tot->vf_tx_bytes = stats->tx_bytes; +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + this_tot->rx_packets = this_tot->vf_rx_packets; + this_tot->tx_packets = this_tot->vf_tx_packets; + this_tot->rx_bytes = this_tot->vf_rx_bytes; +@@ -1318,20 +1318,20 @@ static void netvsc_get_pcpu_stats(struct net_device *net, + + tx_stats = &nvchan->tx_stats; + do { +- start = u64_stats_fetch_begin_irq(&tx_stats->syncp); ++ start = u64_stats_fetch_begin(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + this_tot->tx_bytes += bytes; + this_tot->tx_packets += packets; + + rx_stats = &nvchan->rx_stats; + do { +- start = u64_stats_fetch_begin_irq(&rx_stats->syncp); ++ start = u64_stats_fetch_begin(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + + this_tot->rx_bytes += bytes; + this_tot->rx_packets += packets; +@@ -1370,21 +1370,21 @@ static void netvsc_get_stats64(struct net_device *net, + + tx_stats = &nvchan->tx_stats; + do { +- start = u64_stats_fetch_begin_irq(&tx_stats->syncp); ++ start = u64_stats_fetch_begin(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + t->tx_bytes += bytes; + t->tx_packets += packets; + + rx_stats = &nvchan->rx_stats; + do { +- start = u64_stats_fetch_begin_irq(&rx_stats->syncp); ++ start = u64_stats_fetch_begin(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + multicast = rx_stats->multicast + rx_stats->broadcast; +- } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + + t->rx_bytes += bytes; + t->rx_packets += packets; +@@ -1527,24 +1527,24 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, + tx_stats = &nvdev->chan_table[j].tx_stats; + + do { +- start = u64_stats_fetch_begin_irq(&tx_stats->syncp); ++ start = u64_stats_fetch_begin(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + xdp_xmit = tx_stats->xdp_xmit; +- } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + data[i++] = packets; + data[i++] = bytes; + data[i++] = xdp_xmit; + + rx_stats = &nvdev->chan_table[j].rx_stats; + do { +- start = u64_stats_fetch_begin_irq(&rx_stats->syncp); ++ start = u64_stats_fetch_begin(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + xdp_drop = rx_stats->xdp_drop; + xdp_redirect = rx_stats->xdp_redirect; + xdp_tx = rx_stats->xdp_tx; +- } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + data[i++] = packets; + data[i++] = bytes; + data[i++] = xdp_drop; +diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c +index 1c64d5347b8e..78253ad57b2e 100644 +--- a/drivers/net/ifb.c ++++ b/drivers/net/ifb.c +@@ -162,18 +162,18 @@ static void ifb_stats64(struct net_device *dev, + + for (i = 0; i < dev->num_tx_queues; i++,txp++) { + do { +- start = u64_stats_fetch_begin_irq(&txp->rx_stats.sync); ++ start = u64_stats_fetch_begin(&txp->rx_stats.sync); + packets = txp->rx_stats.packets; + bytes = txp->rx_stats.bytes; +- } while (u64_stats_fetch_retry_irq(&txp->rx_stats.sync, start)); ++ } while (u64_stats_fetch_retry(&txp->rx_stats.sync, start)); + stats->rx_packets += packets; + stats->rx_bytes += bytes; + + do { +- start = u64_stats_fetch_begin_irq(&txp->tx_stats.sync); ++ start = u64_stats_fetch_begin(&txp->tx_stats.sync); + packets = txp->tx_stats.packets; + bytes = txp->tx_stats.bytes; +- } while (u64_stats_fetch_retry_irq(&txp->tx_stats.sync, start)); ++ } while (u64_stats_fetch_retry(&txp->tx_stats.sync, start)); + stats->tx_packets += packets; + stats->tx_bytes += bytes; + } +@@ -245,12 +245,12 @@ static void ifb_fill_stats_data(u64 **data, + int j; + + do { +- start = u64_stats_fetch_begin_irq(&q_stats->sync); ++ start = u64_stats_fetch_begin(&q_stats->sync); + for (j = 0; j < IFB_Q_STATS_LEN; j++) { + offset = ifb_q_stats_desc[j].offset; + (*data)[j] = *(u64 *)(stats_base + offset); + } +- } while (u64_stats_fetch_retry_irq(&q_stats->sync, start)); ++ } while (u64_stats_fetch_retry(&q_stats->sync, start)); + + *data += IFB_Q_STATS_LEN; + } +diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c +index fbf2d5b67aaf..57c79f5f2991 100644 +--- a/drivers/net/ipvlan/ipvlan_main.c ++++ b/drivers/net/ipvlan/ipvlan_main.c +@@ -301,13 +301,13 @@ static void ipvlan_get_stats64(struct net_device *dev, + for_each_possible_cpu(idx) { + pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx); + do { +- strt= u64_stats_fetch_begin_irq(&pcptr->syncp); ++ strt = u64_stats_fetch_begin(&pcptr->syncp); + rx_pkts = u64_stats_read(&pcptr->rx_pkts); + rx_bytes = u64_stats_read(&pcptr->rx_bytes); + rx_mcast = u64_stats_read(&pcptr->rx_mcast); + tx_pkts = u64_stats_read(&pcptr->tx_pkts); + tx_bytes = u64_stats_read(&pcptr->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&pcptr->syncp, ++ } while (u64_stats_fetch_retry(&pcptr->syncp, + strt)); + + s->rx_packets += rx_pkts; +diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c +index 2e9742952c4e..f6d53e63ef4e 100644 +--- a/drivers/net/loopback.c ++++ b/drivers/net/loopback.c +@@ -106,10 +106,10 @@ void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes) + + lb_stats = per_cpu_ptr(dev->lstats, i); + do { +- start = u64_stats_fetch_begin_irq(&lb_stats->syncp); ++ start = u64_stats_fetch_begin(&lb_stats->syncp); + tpackets = u64_stats_read(&lb_stats->packets); + tbytes = u64_stats_read(&lb_stats->bytes); +- } while (u64_stats_fetch_retry_irq(&lb_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&lb_stats->syncp, start)); + *bytes += tbytes; + *packets += tpackets; + } +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 209ee9f35275..f9f25b5f1745 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -2802,9 +2802,9 @@ static void get_rx_sc_stats(struct net_device *dev, + + stats = per_cpu_ptr(rx_sc->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + memcpy(&tmp, &stats->stats, sizeof(tmp)); +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + sum->InOctetsValidated += tmp.InOctetsValidated; + sum->InOctetsDecrypted += tmp.InOctetsDecrypted; +@@ -2883,9 +2883,9 @@ static void get_tx_sc_stats(struct net_device *dev, + + stats = per_cpu_ptr(macsec_priv(dev)->secy.tx_sc.stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + memcpy(&tmp, &stats->stats, sizeof(tmp)); +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + sum->OutPktsProtected += tmp.OutPktsProtected; + sum->OutPktsEncrypted += tmp.OutPktsEncrypted; +@@ -2939,9 +2939,9 @@ static void get_secy_stats(struct net_device *dev, struct macsec_dev_stats *sum) + + stats = per_cpu_ptr(macsec_priv(dev)->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + memcpy(&tmp, &stats->stats, sizeof(tmp)); +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + sum->OutPktsUntagged += tmp.OutPktsUntagged; + sum->InPktsUntagged += tmp.InPktsUntagged; +diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c +index 012830d12fde..9bea3f1b0a8a 100644 +--- a/drivers/net/macvlan.c ++++ b/drivers/net/macvlan.c +@@ -948,13 +948,13 @@ static void macvlan_dev_get_stats64(struct net_device *dev, + for_each_possible_cpu(i) { + p = per_cpu_ptr(vlan->pcpu_stats, i); + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + rx_packets = u64_stats_read(&p->rx_packets); + rx_bytes = u64_stats_read(&p->rx_bytes); + rx_multicast = u64_stats_read(&p->rx_multicast); + tx_packets = u64_stats_read(&p->tx_packets); + tx_bytes = u64_stats_read(&p->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; +diff --git a/drivers/net/mhi_net.c b/drivers/net/mhi_net.c +index 0b9d37979133..3d322ac4f6a5 100644 +--- a/drivers/net/mhi_net.c ++++ b/drivers/net/mhi_net.c +@@ -104,19 +104,19 @@ static void mhi_ndo_get_stats64(struct net_device *ndev, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.rx_syncp); ++ start = u64_stats_fetch_begin(&mhi_netdev->stats.rx_syncp); + stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets); + stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes); + stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors); +- } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start)); ++ } while (u64_stats_fetch_retry(&mhi_netdev->stats.rx_syncp, start)); + + do { +- start = u64_stats_fetch_begin_irq(&mhi_netdev->stats.tx_syncp); ++ start = u64_stats_fetch_begin(&mhi_netdev->stats.tx_syncp); + stats->tx_packets = u64_stats_read(&mhi_netdev->stats.tx_packets); + stats->tx_bytes = u64_stats_read(&mhi_netdev->stats.tx_bytes); + stats->tx_errors = u64_stats_read(&mhi_netdev->stats.tx_errors); + stats->tx_dropped = u64_stats_read(&mhi_netdev->stats.tx_dropped); +- } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.tx_syncp, start)); ++ } while (u64_stats_fetch_retry(&mhi_netdev->stats.tx_syncp, start)); + } + + static const struct net_device_ops mhi_netdev_ops = { +diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c +index 9a1a5b203624..e470e3398abc 100644 +--- a/drivers/net/netdevsim/netdev.c ++++ b/drivers/net/netdevsim/netdev.c +@@ -67,10 +67,10 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&ns->syncp); ++ start = u64_stats_fetch_begin(&ns->syncp); + stats->tx_bytes = ns->tx_bytes; + stats->tx_packets = ns->tx_packets; +- } while (u64_stats_fetch_retry_irq(&ns->syncp, start)); ++ } while (u64_stats_fetch_retry(&ns->syncp, start)); + } + + static int +diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c +index 293eaf6b3ec9..eccf9df0c88c 100644 +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -1868,13 +1868,13 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) + for_each_possible_cpu(i) { + p = per_cpu_ptr(team->pcpu_stats, i); + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + rx_packets = u64_stats_read(&p->rx_packets); + rx_bytes = u64_stats_read(&p->rx_bytes); + rx_multicast = u64_stats_read(&p->rx_multicast); + tx_packets = u64_stats_read(&p->tx_packets); + tx_bytes = u64_stats_read(&p->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; +diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c +index b095a4b4957b..18d99fda997c 100644 +--- a/drivers/net/team/team_mode_loadbalance.c ++++ b/drivers/net/team/team_mode_loadbalance.c +@@ -466,9 +466,9 @@ static void __lb_one_cpu_stats_add(struct lb_stats *acc_stats, + struct lb_stats tmp; + + do { +- start = u64_stats_fetch_begin_irq(syncp); ++ start = u64_stats_fetch_begin(syncp); + tmp.tx_bytes = cpu_stats->tx_bytes; +- } while (u64_stats_fetch_retry_irq(syncp, start)); ++ } while (u64_stats_fetch_retry(syncp, start)); + acc_stats->tx_bytes += tmp.tx_bytes; + } + +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index 36c5a41f84e4..605f511a886c 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -182,12 +182,12 @@ static void veth_get_ethtool_stats(struct net_device *dev, + size_t offset; + + do { +- start = u64_stats_fetch_begin_irq(&rq_stats->syncp); ++ start = u64_stats_fetch_begin(&rq_stats->syncp); + for (j = 0; j < VETH_RQ_STATS_LEN; j++) { + offset = veth_rq_stats_desc[j].offset; + data[idx + j] = *(u64 *)(stats_base + offset); + } +- } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); + idx += VETH_RQ_STATS_LEN; + } + +@@ -203,12 +203,12 @@ static void veth_get_ethtool_stats(struct net_device *dev, + + tx_idx += (i % dev->real_num_tx_queues) * VETH_TQ_STATS_LEN; + do { +- start = u64_stats_fetch_begin_irq(&rq_stats->syncp); ++ start = u64_stats_fetch_begin(&rq_stats->syncp); + for (j = 0; j < VETH_TQ_STATS_LEN; j++) { + offset = veth_tq_stats_desc[j].offset; + data[tx_idx + j] += *(u64 *)(base + offset); + } +- } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rq_stats->syncp, start)); + } + } + +@@ -381,13 +381,13 @@ static void veth_stats_rx(struct veth_stats *result, struct net_device *dev) + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + peer_tq_xdp_xmit_err = stats->vs.peer_tq_xdp_xmit_err; + xdp_tx_err = stats->vs.xdp_tx_err; + packets = stats->vs.xdp_packets; + bytes = stats->vs.xdp_bytes; + drops = stats->vs.rx_drops; +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + result->peer_tq_xdp_xmit_err += peer_tq_xdp_xmit_err; + result->xdp_tx_err += xdp_tx_err; + result->xdp_packets += packets; +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index 21d3461fb5d1..666622ae4b9d 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -2107,18 +2107,18 @@ static void virtnet_stats(struct net_device *dev, + struct send_queue *sq = &vi->sq[i]; + + do { +- start = u64_stats_fetch_begin_irq(&sq->stats.syncp); ++ start = u64_stats_fetch_begin(&sq->stats.syncp); + tpackets = sq->stats.packets; + tbytes = sq->stats.bytes; + terrors = sq->stats.tx_timeouts; +- } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); + + do { +- start = u64_stats_fetch_begin_irq(&rq->stats.syncp); ++ start = u64_stats_fetch_begin(&rq->stats.syncp); + rpackets = rq->stats.packets; + rbytes = rq->stats.bytes; + rdrops = rq->stats.drops; +- } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); + + tot->rx_packets += rpackets; + tot->tx_packets += tpackets; +@@ -2726,12 +2726,12 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, + + stats_base = (u8 *)&rq->stats; + do { +- start = u64_stats_fetch_begin_irq(&rq->stats.syncp); ++ start = u64_stats_fetch_begin(&rq->stats.syncp); + for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) { + offset = virtnet_rq_stats_desc[j].offset; + data[idx + j] = *(u64 *)(stats_base + offset); + } +- } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&rq->stats.syncp, start)); + idx += VIRTNET_RQ_STATS_LEN; + } + +@@ -2740,12 +2740,12 @@ static void virtnet_get_ethtool_stats(struct net_device *dev, + + stats_base = (u8 *)&sq->stats; + do { +- start = u64_stats_fetch_begin_irq(&sq->stats.syncp); ++ start = u64_stats_fetch_begin(&sq->stats.syncp); + for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) { + offset = virtnet_sq_stats_desc[j].offset; + data[idx + j] = *(u64 *)(stats_base + offset); + } +- } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); ++ } while (u64_stats_fetch_retry(&sq->stats.syncp, start)); + idx += VIRTNET_SQ_STATS_LEN; + } + } +diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c +index 208df4d41939..6043e63b42f9 100644 +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -159,13 +159,13 @@ static void vrf_get_stats64(struct net_device *dev, + + dstats = per_cpu_ptr(dev->dstats, i); + do { +- start = u64_stats_fetch_begin_irq(&dstats->syncp); ++ start = u64_stats_fetch_begin(&dstats->syncp); + tbytes = dstats->tx_bytes; + tpkts = dstats->tx_pkts; + tdrops = dstats->tx_drps; + rbytes = dstats->rx_bytes; + rpkts = dstats->rx_pkts; +- } while (u64_stats_fetch_retry_irq(&dstats->syncp, start)); ++ } while (u64_stats_fetch_retry(&dstats->syncp, start)); + stats->tx_bytes += tbytes; + stats->tx_packets += tpkts; + stats->tx_dropped += tdrops; +diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c +index c5cf55030158..c3ff30ab782e 100644 +--- a/drivers/net/vxlan/vxlan_vnifilter.c ++++ b/drivers/net/vxlan/vxlan_vnifilter.c +@@ -129,9 +129,9 @@ static void vxlan_vnifilter_stats_get(const struct vxlan_vni_node *vninode, + + pstats = per_cpu_ptr(vninode->stats, i); + do { +- start = u64_stats_fetch_begin_irq(&pstats->syncp); ++ start = u64_stats_fetch_begin(&pstats->syncp); + memcpy(&temp, &pstats->stats, sizeof(temp)); +- } while (u64_stats_fetch_retry_irq(&pstats->syncp, start)); ++ } while (u64_stats_fetch_retry(&pstats->syncp, start)); + + dest->rx_packets += temp.rx_packets; + dest->rx_bytes += temp.rx_bytes; +diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c +index ef70bb7c88ad..3f72ae943b29 100644 +--- a/drivers/net/wwan/mhi_wwan_mbim.c ++++ b/drivers/net/wwan/mhi_wwan_mbim.c +@@ -456,19 +456,19 @@ static void mhi_mbim_ndo_get_stats64(struct net_device *ndev, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&link->rx_syncp); ++ start = u64_stats_fetch_begin(&link->rx_syncp); + stats->rx_packets = u64_stats_read(&link->rx_packets); + stats->rx_bytes = u64_stats_read(&link->rx_bytes); + stats->rx_errors = u64_stats_read(&link->rx_errors); +- } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start)); ++ } while (u64_stats_fetch_retry(&link->rx_syncp, start)); + + do { +- start = u64_stats_fetch_begin_irq(&link->tx_syncp); ++ start = u64_stats_fetch_begin(&link->tx_syncp); + stats->tx_packets = u64_stats_read(&link->tx_packets); + stats->tx_bytes = u64_stats_read(&link->tx_bytes); + stats->tx_errors = u64_stats_read(&link->tx_errors); + stats->tx_dropped = u64_stats_read(&link->tx_dropped); +- } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start)); ++ } while (u64_stats_fetch_retry(&link->tx_syncp, start)); + } + + static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev, +diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c +index dc404e05970c..14aec417fa06 100644 +--- a/drivers/net/xen-netfront.c ++++ b/drivers/net/xen-netfront.c +@@ -1392,16 +1392,16 @@ static void xennet_get_stats64(struct net_device *dev, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&tx_stats->syncp); ++ start = u64_stats_fetch_begin(&tx_stats->syncp); + tx_packets = tx_stats->packets; + tx_bytes = tx_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + do { +- start = u64_stats_fetch_begin_irq(&rx_stats->syncp); ++ start = u64_stats_fetch_begin(&rx_stats->syncp); + rx_packets = rx_stats->packets; + rx_bytes = rx_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; +-- +2.43.0 + diff --git a/debian/patches-rt/0006-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-n.patch b/debian/patches-rt/0006-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-n.patch new file mode 100644 index 000000000..ff493ddaf --- /dev/null +++ b/debian/patches-rt/0006-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-n.patch @@ -0,0 +1,393 @@ +From a0c8ef7e6160582c71c0d8b1786d8e45dcc02132 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:17:37 +0200 +Subject: [PATCH 06/62] net: Remove the obsolte u64_stats_fetch_*_irq() users + (net). +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Convert to the regular interface. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + net/8021q/vlan_dev.c | 4 ++-- + net/bridge/br_multicast.c | 4 ++-- + net/bridge/br_vlan.c | 4 ++-- + net/core/dev.c | 4 ++-- + net/core/drop_monitor.c | 8 ++++---- + net/core/gen_stats.c | 16 ++++++++-------- + net/devlink/leftover.c | 4 ++-- + net/dsa/slave.c | 4 ++-- + net/ipv4/af_inet.c | 4 ++-- + net/ipv6/seg6_local.c | 4 ++-- + net/mac80211/sta_info.c | 8 ++++---- + net/mpls/af_mpls.c | 4 ++-- + net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- + net/netfilter/nf_tables_api.c | 4 ++-- + net/openvswitch/datapath.c | 4 ++-- + net/openvswitch/flow_table.c | 9 ++++----- + 16 files changed, 44 insertions(+), 45 deletions(-) + +diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c +index d3e511e1eba8..0fa52bcc296b 100644 +--- a/net/8021q/vlan_dev.c ++++ b/net/8021q/vlan_dev.c +@@ -712,13 +712,13 @@ static void vlan_dev_get_stats64(struct net_device *dev, + + p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i); + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + rxpackets = u64_stats_read(&p->rx_packets); + rxbytes = u64_stats_read(&p->rx_bytes); + rxmulticast = u64_stats_read(&p->rx_multicast); + txpackets = u64_stats_read(&p->tx_packets); + txbytes = u64_stats_read(&p->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += rxpackets; + stats->rx_bytes += rxbytes; +diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c +index db4f2641d1cd..7e2a9fb5786c 100644 +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -4899,9 +4899,9 @@ void br_multicast_get_stats(const struct net_bridge *br, + unsigned int start; + + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + mcast_stats_add_dir(tdst.igmp_v1queries, temp.igmp_v1queries); + mcast_stats_add_dir(tdst.igmp_v2queries, temp.igmp_v2queries); +diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c +index 9ffd40b8270c..bc75fa1e4666 100644 +--- a/net/bridge/br_vlan.c ++++ b/net/bridge/br_vlan.c +@@ -1389,12 +1389,12 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, + + cpu_stats = per_cpu_ptr(v->stats, i); + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + rxpackets = u64_stats_read(&cpu_stats->rx_packets); + rxbytes = u64_stats_read(&cpu_stats->rx_bytes); + txbytes = u64_stats_read(&cpu_stats->tx_bytes); + txpackets = u64_stats_read(&cpu_stats->tx_packets); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + u64_stats_add(&stats->rx_packets, rxpackets); + u64_stats_add(&stats->rx_bytes, rxbytes); +diff --git a/net/core/dev.c b/net/core/dev.c +index 0d5aa820fd83..070039f9296c 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -10505,12 +10505,12 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, + + stats = per_cpu_ptr(netstats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + rx_packets = u64_stats_read(&stats->rx_packets); + rx_bytes = u64_stats_read(&stats->rx_bytes); + tx_packets = u64_stats_read(&stats->tx_packets); + tx_bytes = u64_stats_read(&stats->tx_bytes); +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + s->rx_packets += rx_packets; + s->rx_bytes += rx_bytes; +diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c +index 8e0a90b45df2..4d5e8b317c47 100644 +--- a/net/core/drop_monitor.c ++++ b/net/core/drop_monitor.c +@@ -1432,9 +1432,9 @@ static void net_dm_stats_read(struct net_dm_stats *stats) + u64 dropped; + + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + dropped = u64_stats_read(&cpu_stats->dropped); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + u64_stats_add(&stats->dropped, dropped); + } +@@ -1476,9 +1476,9 @@ static void net_dm_hw_stats_read(struct net_dm_stats *stats) + u64 dropped; + + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + dropped = u64_stats_read(&cpu_stats->dropped); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + u64_stats_add(&stats->dropped, dropped); + } +diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c +index c8d137ef5980..b71ccaec0991 100644 +--- a/net/core/gen_stats.c ++++ b/net/core/gen_stats.c +@@ -135,10 +135,10 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats, + u64 bytes, packets; + + do { +- start = u64_stats_fetch_begin_irq(&bcpu->syncp); ++ start = u64_stats_fetch_begin(&bcpu->syncp); + bytes = u64_stats_read(&bcpu->bytes); + packets = u64_stats_read(&bcpu->packets); +- } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); ++ } while (u64_stats_fetch_retry(&bcpu->syncp, start)); + + t_bytes += bytes; + t_packets += packets; +@@ -162,10 +162,10 @@ void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats, + } + do { + if (running) +- start = u64_stats_fetch_begin_irq(&b->syncp); ++ start = u64_stats_fetch_begin(&b->syncp); + bytes = u64_stats_read(&b->bytes); + packets = u64_stats_read(&b->packets); +- } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); ++ } while (running && u64_stats_fetch_retry(&b->syncp, start)); + + _bstats_update(bstats, bytes, packets); + } +@@ -187,10 +187,10 @@ static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, + u64 bytes, packets; + + do { +- start = u64_stats_fetch_begin_irq(&bcpu->syncp); ++ start = u64_stats_fetch_begin(&bcpu->syncp); + bytes = u64_stats_read(&bcpu->bytes); + packets = u64_stats_read(&bcpu->packets); +- } while (u64_stats_fetch_retry_irq(&bcpu->syncp, start)); ++ } while (u64_stats_fetch_retry(&bcpu->syncp, start)); + + t_bytes += bytes; + t_packets += packets; +@@ -201,10 +201,10 @@ static void gnet_stats_read_basic(u64 *ret_bytes, u64 *ret_packets, + } + do { + if (running) +- start = u64_stats_fetch_begin_irq(&b->syncp); ++ start = u64_stats_fetch_begin(&b->syncp); + *ret_bytes = u64_stats_read(&b->bytes); + *ret_packets = u64_stats_read(&b->packets); +- } while (running && u64_stats_fetch_retry_irq(&b->syncp, start)); ++ } while (running && u64_stats_fetch_retry(&b->syncp, start)); + } + + static int +diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c +index 032c7af065cd..94e8cc3de330 100644 +--- a/net/devlink/leftover.c ++++ b/net/devlink/leftover.c +@@ -8307,10 +8307,10 @@ static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, + + cpu_stats = per_cpu_ptr(trap_stats, i); + do { +- start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ start = u64_stats_fetch_begin(&cpu_stats->syncp); + rx_packets = u64_stats_read(&cpu_stats->rx_packets); + rx_bytes = u64_stats_read(&cpu_stats->rx_bytes); +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, start)); + + u64_stats_add(&stats->rx_packets, rx_packets); + u64_stats_add(&stats->rx_bytes, rx_bytes); +diff --git a/net/dsa/slave.c b/net/dsa/slave.c +index 5fe075bf479e..28ee63ec1d1d 100644 +--- a/net/dsa/slave.c ++++ b/net/dsa/slave.c +@@ -976,12 +976,12 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, + + s = per_cpu_ptr(dev->tstats, i); + do { +- start = u64_stats_fetch_begin_irq(&s->syncp); ++ start = u64_stats_fetch_begin(&s->syncp); + tx_packets = u64_stats_read(&s->tx_packets); + tx_bytes = u64_stats_read(&s->tx_bytes); + rx_packets = u64_stats_read(&s->rx_packets); + rx_bytes = u64_stats_read(&s->rx_bytes); +- } while (u64_stats_fetch_retry_irq(&s->syncp, start)); ++ } while (u64_stats_fetch_retry(&s->syncp, start)); + data[0] += tx_packets; + data[1] += tx_bytes; + data[2] += rx_packets; +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 5d379df90c82..312c730b725f 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1708,9 +1708,9 @@ u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offt, + bhptr = per_cpu_ptr(mib, cpu); + syncp = (struct u64_stats_sync *)(bhptr + syncp_offset); + do { +- start = u64_stats_fetch_begin_irq(syncp); ++ start = u64_stats_fetch_begin(syncp); + v = *(((u64 *)bhptr) + offt); +- } while (u64_stats_fetch_retry_irq(syncp, start)); ++ } while (u64_stats_fetch_retry(syncp, start)); + + return v; + } +diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c +index 8370726ae7bf..487f8e98deaa 100644 +--- a/net/ipv6/seg6_local.c ++++ b/net/ipv6/seg6_local.c +@@ -1644,13 +1644,13 @@ static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt) + + pcounters = per_cpu_ptr(slwt->pcpu_counters, i); + do { +- start = u64_stats_fetch_begin_irq(&pcounters->syncp); ++ start = u64_stats_fetch_begin(&pcounters->syncp); + + packets = u64_stats_read(&pcounters->packets); + bytes = u64_stats_read(&pcounters->bytes); + errors = u64_stats_read(&pcounters->errors); + +- } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start)); ++ } while (u64_stats_fetch_retry(&pcounters->syncp, start)); + + counters.packets += packets; + counters.bytes += bytes; +diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c +index 49b71453dec3..c462e20ccc8d 100644 +--- a/net/mac80211/sta_info.c ++++ b/net/mac80211/sta_info.c +@@ -2397,9 +2397,9 @@ static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, + u64 value; + + do { +- start = u64_stats_fetch_begin_irq(&rxstats->syncp); ++ start = u64_stats_fetch_begin(&rxstats->syncp); + value = rxstats->msdu[tid]; +- } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + + return value; + } +@@ -2465,9 +2465,9 @@ static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) + u64 value; + + do { +- start = u64_stats_fetch_begin_irq(&rxstats->syncp); ++ start = u64_stats_fetch_begin(&rxstats->syncp); + value = rxstats->bytes; +- } while (u64_stats_fetch_retry_irq(&rxstats->syncp, start)); ++ } while (u64_stats_fetch_retry(&rxstats->syncp, start)); + + return value; + } +diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c +index f1f43894efb8..dc5165d3eec4 100644 +--- a/net/mpls/af_mpls.c ++++ b/net/mpls/af_mpls.c +@@ -1079,9 +1079,9 @@ static void mpls_get_stats(struct mpls_dev *mdev, + + p = per_cpu_ptr(mdev->stats, i); + do { +- start = u64_stats_fetch_begin_irq(&p->syncp); ++ start = u64_stats_fetch_begin(&p->syncp); + local = p->stats; +- } while (u64_stats_fetch_retry_irq(&p->syncp, start)); ++ } while (u64_stats_fetch_retry(&p->syncp, start)); + + stats->rx_packets += local.rx_packets; + stats->rx_bytes += local.rx_bytes; +diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c +index 17a1b731a76b..2be696513629 100644 +--- a/net/netfilter/ipvs/ip_vs_ctl.c ++++ b/net/netfilter/ipvs/ip_vs_ctl.c +@@ -2299,13 +2299,13 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) + u64 conns, inpkts, outpkts, inbytes, outbytes; + + do { +- start = u64_stats_fetch_begin_irq(&u->syncp); ++ start = u64_stats_fetch_begin(&u->syncp); + conns = u64_stats_read(&u->cnt.conns); + inpkts = u64_stats_read(&u->cnt.inpkts); + outpkts = u64_stats_read(&u->cnt.outpkts); + inbytes = u64_stats_read(&u->cnt.inbytes); + outbytes = u64_stats_read(&u->cnt.outbytes); +- } while (u64_stats_fetch_retry_irq(&u->syncp, start)); ++ } while (u64_stats_fetch_retry(&u->syncp, start)); + + seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", + i, (u64)conns, (u64)inpkts, +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 05fa5141af51..ab1888991ae5 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -1692,10 +1692,10 @@ static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats) + for_each_possible_cpu(cpu) { + cpu_stats = per_cpu_ptr(stats, cpu); + do { +- seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp); ++ seq = u64_stats_fetch_begin(&cpu_stats->syncp); + pkts = cpu_stats->pkts; + bytes = cpu_stats->bytes; +- } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq)); ++ } while (u64_stats_fetch_retry(&cpu_stats->syncp, seq)); + total.pkts += pkts; + total.bytes += bytes; + } +diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c +index 3c7b24535409..0953f531f984 100644 +--- a/net/openvswitch/datapath.c ++++ b/net/openvswitch/datapath.c +@@ -716,9 +716,9 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats, + percpu_stats = per_cpu_ptr(dp->stats_percpu, i); + + do { +- start = u64_stats_fetch_begin_irq(&percpu_stats->syncp); ++ start = u64_stats_fetch_begin(&percpu_stats->syncp); + local_stats = *percpu_stats; +- } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&percpu_stats->syncp, start)); + + stats->n_hit += local_stats.n_hit; + stats->n_missed += local_stats.n_missed; +diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c +index d4a2db0b2299..0a0e4c283f02 100644 +--- a/net/openvswitch/flow_table.c ++++ b/net/openvswitch/flow_table.c +@@ -205,9 +205,9 @@ static void tbl_mask_array_reset_counters(struct mask_array *ma) + + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + counter = stats->usage_cntrs[i]; +- } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + ma->masks_usage_zero_cntr[i] += counter; + } +@@ -1136,10 +1136,9 @@ void ovs_flow_masks_rebalance(struct flow_table *table) + + stats = per_cpu_ptr(ma->masks_usage_stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&stats->syncp); ++ start = u64_stats_fetch_begin(&stats->syncp); + counter = stats->usage_cntrs[i]; +- } while (u64_stats_fetch_retry_irq(&stats->syncp, +- start)); ++ } while (u64_stats_fetch_retry(&stats->syncp, start)); + + masks_and_count[i].counter += counter; + } +-- +2.43.0 + diff --git a/debian/patches-rt/0007-bpf-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch b/debian/patches-rt/0007-bpf-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch new file mode 100644 index 000000000..6b725886f --- /dev/null +++ b/debian/patches-rt/0007-bpf-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch @@ -0,0 +1,51 @@ +From b8cff7d0320e3b39f098d9562373e1c16c54c46c Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:17:57 +0200 +Subject: [PATCH 07/62] bpf: Remove the obsolte u64_stats_fetch_*_irq() users. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Convert to the regular interface. + +Cc: Alexei Starovoitov <ast@kernel.org> +Cc: Andrii Nakryiko <andrii@kernel.org> +Cc: Daniel Borkmann <daniel@iogearbox.net> +Cc: Hao Luo <haoluo@google.com> +Cc: Jiri Olsa <jolsa@kernel.org> +Cc: John Fastabend <john.fastabend@gmail.com> +Cc: KP Singh <kpsingh@kernel.org> +Cc: Martin KaFai Lau <martin.lau@linux.dev> +Cc: Song Liu <song@kernel.org> +Cc: Stanislav Fomichev <sdf@google.com> +Cc: Yonghong Song <yhs@fb.com> +Cc: bpf@vger.kernel.org +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + kernel/bpf/syscall.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index 0c8b7733573e..c0915e2424f1 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -2115,11 +2115,11 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, + + st = per_cpu_ptr(prog->stats, cpu); + do { +- start = u64_stats_fetch_begin_irq(&st->syncp); ++ start = u64_stats_fetch_begin(&st->syncp); + tnsecs = u64_stats_read(&st->nsecs); + tcnt = u64_stats_read(&st->cnt); + tmisses = u64_stats_read(&st->misses); +- } while (u64_stats_fetch_retry_irq(&st->syncp, start)); ++ } while (u64_stats_fetch_retry(&st->syncp, start)); + nsecs += tnsecs; + cnt += tcnt; + misses += tmisses; +-- +2.43.0 + diff --git a/debian/patches-rt/0008-u64_stat-Remove-the-obsolete-fetch_irq-variants.patch b/debian/patches-rt/0008-u64_stat-Remove-the-obsolete-fetch_irq-variants.patch new file mode 100644 index 000000000..176dbf1e5 --- /dev/null +++ b/debian/patches-rt/0008-u64_stat-Remove-the-obsolete-fetch_irq-variants.patch @@ -0,0 +1,42 @@ +From b908a7b47d95003c498f2f575285f528148602d6 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 25 Aug 2022 16:43:46 +0200 +Subject: [PATCH 08/62] u64_stat: Remove the obsolete fetch_irq() variants. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Now that the 32bit UP oddity is gone and 32bit uses always a sequence +count, there is no need for the fetch_irq() variants anymore. + +Delete the obsolete interfaces. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> +--- + include/linux/u64_stats_sync.h | 12 ------------ + 1 file changed, 12 deletions(-) + +diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h +index 46040d66334a..ffe48e69b3f3 100644 +--- a/include/linux/u64_stats_sync.h ++++ b/include/linux/u64_stats_sync.h +@@ -213,16 +213,4 @@ static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, + return __u64_stats_fetch_retry(syncp, start); + } + +-/* Obsolete interfaces */ +-static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp) +-{ +- return u64_stats_fetch_begin(syncp); +-} +- +-static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp, +- unsigned int start) +-{ +- return u64_stats_fetch_retry(syncp, start); +-} +- + #endif /* _LINUX_U64_STATS_SYNC_H */ +-- +2.43.0 + diff --git a/debian/patches-rt/0009-net-Avoid-the-IPI-to-free-the.patch b/debian/patches-rt/0009-net-Avoid-the-IPI-to-free-the.patch new file mode 100644 index 000000000..1f6687796 --- /dev/null +++ b/debian/patches-rt/0009-net-Avoid-the-IPI-to-free-the.patch @@ -0,0 +1,125 @@ +From c910f301d71266e18f63407ec6c65d19ae90e779 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 15 Aug 2022 17:29:50 +0200 +Subject: [PATCH 09/62] net: Avoid the IPI to free the +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +skb_attempt_defer_free() collects a skbs, which was allocated on a +remote CPU, on a per-CPU list. These skbs are either freed on that +remote CPU once the CPU enters NET_RX or an remote IPI function is +invoked in to raise the NET_RX softirq if a threshold of pending skb has +been exceeded. +This remote IPI can cause the wakeup of ksoftirqd on PREEMPT_RT if the +remote CPU idle was idle. This is undesired because once the ksoftirqd +is running it will acquire all pending softirqs and they will not be +executed as part of the threaded interrupt until ksoftird goes idle +again. + +To void all this, schedule the deferred clean up from a worker. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/netdevice.h | 4 ++++ + net/core/dev.c | 37 ++++++++++++++++++++++++++++--------- + net/core/skbuff.c | 7 ++++++- + 3 files changed, 38 insertions(+), 10 deletions(-) + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 0373e0935990..55d698367883 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3169,7 +3169,11 @@ struct softnet_data { + int defer_count; + int defer_ipi_scheduled; + struct sk_buff *defer_list; ++#ifndef CONFIG_PREEMPT_RT + call_single_data_t defer_csd; ++#else ++ struct work_struct defer_work; ++#endif + }; + + static inline void input_queue_head_incr(struct softnet_data *sd) +diff --git a/net/core/dev.c b/net/core/dev.c +index 070039f9296c..a3caa23be3cf 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -4618,15 +4618,6 @@ static void rps_trigger_softirq(void *data) + + #endif /* CONFIG_RPS */ + +-/* Called from hardirq (IPI) context */ +-static void trigger_rx_softirq(void *data) +-{ +- struct softnet_data *sd = data; +- +- __raise_softirq_irqoff(NET_RX_SOFTIRQ); +- smp_store_release(&sd->defer_ipi_scheduled, 0); +-} +- + /* + * Check if this softnet_data structure is another cpu one + * If yes, queue it to our IPI list and return 1 +@@ -6684,6 +6675,30 @@ static void skb_defer_free_flush(struct softnet_data *sd) + } + } + ++#ifndef CONFIG_PREEMPT_RT ++/* Called from hardirq (IPI) context */ ++static void trigger_rx_softirq(void *data) ++{ ++ struct softnet_data *sd = data; ++ ++ __raise_softirq_irqoff(NET_RX_SOFTIRQ); ++ smp_store_release(&sd->defer_ipi_scheduled, 0); ++} ++ ++#else ++ ++static void trigger_rx_softirq(struct work_struct *defer_work) ++{ ++ struct softnet_data *sd; ++ ++ sd = container_of(defer_work, struct softnet_data, defer_work); ++ smp_store_release(&sd->defer_ipi_scheduled, 0); ++ local_bh_disable(); ++ skb_defer_free_flush(sd); ++ local_bh_enable(); ++} ++#endif ++ + static __latent_entropy void net_rx_action(struct softirq_action *h) + { + struct softnet_data *sd = this_cpu_ptr(&softnet_data); +@@ -11435,7 +11450,11 @@ static int __init net_dev_init(void) + INIT_CSD(&sd->csd, rps_trigger_softirq, sd); + sd->cpu = i; + #endif ++#ifndef CONFIG_PREEMPT_RT + INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd); ++#else ++ INIT_WORK(&sd->defer_work, trigger_rx_softirq); ++#endif + spin_lock_init(&sd->defer_lock); + + init_gro_hash(&sd->backlog); +diff --git a/net/core/skbuff.c b/net/core/skbuff.c +index 73b1e0e53534..a457a3445469 100644 +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -6680,6 +6680,11 @@ nodefer: __kfree_skb(skb); + /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU + * if we are unlucky enough (this seems very unlikely). + */ +- if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) ++ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1)) { ++#ifndef CONFIG_PREEMPT_RT + smp_call_function_single_async(cpu, &sd->defer_csd); ++#else ++ schedule_work_on(cpu, &sd->defer_work); ++#endif ++ } + } +-- +2.43.0 + diff --git a/debian/patches-rt/0010-x86-Allow-to-enable-RT.patch b/debian/patches-rt/0010-x86-Allow-to-enable-RT.patch new file mode 100644 index 000000000..e7681047e --- /dev/null +++ b/debian/patches-rt/0010-x86-Allow-to-enable-RT.patch @@ -0,0 +1,29 @@ +From ce04e41eb149fcd93a71b63a605423d7f18ec8b4 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 7 Aug 2019 18:15:38 +0200 +Subject: [PATCH 10/62] x86: Allow to enable RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/x86/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 4c9bfc4be58d..f7f81e3012cc 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -27,6 +27,7 @@ config X86_64 + # Options that are inherently 64-bit kernel only: + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 ++ select ARCH_SUPPORTS_RT + select ARCH_USE_CMPXCHG_LOCKREF + select HAVE_ARCH_SOFT_DIRTY + select MODULES_USE_ELF_RELA +-- +2.43.0 + diff --git a/debian/patches-rt/0011-x86-Enable-RT-also-on-32bit.patch b/debian/patches-rt/0011-x86-Enable-RT-also-on-32bit.patch new file mode 100644 index 000000000..de093669b --- /dev/null +++ b/debian/patches-rt/0011-x86-Enable-RT-also-on-32bit.patch @@ -0,0 +1,35 @@ +From 28dbe0fc30a2d3e519fac1ffe18fe7427f1f49b3 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu, 7 Nov 2019 17:49:20 +0100 +Subject: [PATCH 11/62] x86: Enable RT also on 32bit +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/x86/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index f7f81e3012cc..c9bed9c69423 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -27,7 +27,6 @@ config X86_64 + # Options that are inherently 64-bit kernel only: + select ARCH_HAS_GIGANTIC_PAGE + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 +- select ARCH_SUPPORTS_RT + select ARCH_USE_CMPXCHG_LOCKREF + select HAVE_ARCH_SOFT_DIRTY + select MODULES_USE_ELF_RELA +@@ -114,6 +113,7 @@ config X86 + select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG + select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_LTO_CLANG_THIN ++ select ARCH_SUPPORTS_RT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_MEMTEST + select ARCH_USE_QUEUED_RWLOCKS +-- +2.43.0 + diff --git a/debian/patches-rt/0012-softirq-Use-a-dedicated-thread-for-timer-wakeups.patch b/debian/patches-rt/0012-softirq-Use-a-dedicated-thread-for-timer-wakeups.patch new file mode 100644 index 000000000..720116187 --- /dev/null +++ b/debian/patches-rt/0012-softirq-Use-a-dedicated-thread-for-timer-wakeups.patch @@ -0,0 +1,234 @@ +From 454343a4f08e5de772024588aec2bd396177ee89 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 1 Dec 2021 17:41:09 +0100 +Subject: [PATCH 12/62] softirq: Use a dedicated thread for timer wakeups. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +A timer/hrtimer softirq is raised in-IRQ context. With threaded +interrupts enabled or on PREEMPT_RT this leads to waking the ksoftirqd +for the processing of the softirq. +Once the ksoftirqd is marked as pending (or is running) it will collect +all raised softirqs. This in turn means that a softirq which would have +been processed at the end of the threaded interrupt, which runs at an +elevated priority, is now moved to ksoftirqd which runs at SCHED_OTHER +priority and competes with every regular task for CPU resources. +This introduces long delays on heavy loaded systems and is not desired +especially if the system is not overloaded by the softirqs. + +Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated +timers thread and let it run at the lowest SCHED_FIFO priority. +RT tasks are are woken up from hardirq context so only timer_list timers +and hrtimers for "regular" tasks are processed here. The higher priority +ensures that wakeups are performed before scheduling SCHED_OTHER tasks. + +Using a dedicated variable to store the pending softirq bits values +ensure that the timer are not accidentally picked up by ksoftirqd and +other threaded interrupts. +It shouldn't be picked up by ksoftirqd since it runs at lower priority. +However if the timer bits are ORed while a threaded interrupt is +running, then the timer softirq would be performed at higher priority. +The new timer thread will block on the softirq lock before it starts +softirq work. This "race window" isn't closed because while timer thread +is performing the softirq it can get PI-boosted via the softirq lock by +a random force-threaded thread. +The timer thread can pick up pending softirqs from ksoftirqd but only +if the softirq load is high. It is not be desired that the picked up +softirqs are processed at SCHED_FIFO priority under high softirq load +but this can already happen by a PI-boost by a force-threaded interrupt. + +Reported-by: kernel test robot <lkp@intel.com> [ static timer_threads ] +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/interrupt.h | 16 +++++++ + kernel/softirq.c | 92 +++++++++++++++++++++++++++++++++++++-- + kernel/time/hrtimer.c | 4 +- + kernel/time/timer.c | 2 +- + 4 files changed, 108 insertions(+), 6 deletions(-) + +diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h +index 4a1dc88ddbff..0efba74a835c 100644 +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -609,6 +609,22 @@ extern void __raise_softirq_irqoff(unsigned int nr); + extern void raise_softirq_irqoff(unsigned int nr); + extern void raise_softirq(unsigned int nr); + ++#ifdef CONFIG_PREEMPT_RT ++extern void raise_timer_softirq(void); ++extern void raise_hrtimer_softirq(void); ++ ++#else ++static inline void raise_timer_softirq(void) ++{ ++ raise_softirq(TIMER_SOFTIRQ); ++} ++ ++static inline void raise_hrtimer_softirq(void) ++{ ++ raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++} ++#endif ++ + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); + + static inline struct task_struct *this_cpu_ksoftirqd(void) +diff --git a/kernel/softirq.c b/kernel/softirq.c +index c8a6913c067d..ed6d7c41aa17 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -637,6 +637,29 @@ static inline void tick_irq_exit(void) + #endif + } + ++#ifdef CONFIG_PREEMPT_RT ++static DEFINE_PER_CPU(struct task_struct *, timersd); ++static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); ++ ++static unsigned int local_pending_timers(void) ++{ ++ return __this_cpu_read(pending_timer_softirq); ++} ++ ++static void wake_timersd(void) ++{ ++ struct task_struct *tsk = __this_cpu_read(timersd); ++ ++ if (tsk) ++ wake_up_process(tsk); ++} ++ ++#else ++ ++static inline void wake_timersd(void) { } ++ ++#endif ++ + static inline void __irq_exit_rcu(void) + { + #ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED +@@ -646,8 +669,13 @@ static inline void __irq_exit_rcu(void) + #endif + account_hardirq_exit(current); + preempt_count_sub(HARDIRQ_OFFSET); +- if (!in_interrupt() && local_softirq_pending()) +- invoke_softirq(); ++ if (!in_interrupt()) { ++ if (local_softirq_pending()) ++ invoke_softirq(); ++ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers()) ++ wake_timersd(); ++ } + + tick_irq_exit(); + } +@@ -976,12 +1004,70 @@ static struct smp_hotplug_thread softirq_threads = { + .thread_comm = "ksoftirqd/%u", + }; + ++#ifdef CONFIG_PREEMPT_RT ++static void timersd_setup(unsigned int cpu) ++{ ++ sched_set_fifo_low(current); ++} ++ ++static int timersd_should_run(unsigned int cpu) ++{ ++ return local_pending_timers(); ++} ++ ++static void run_timersd(unsigned int cpu) ++{ ++ unsigned int timer_si; ++ ++ ksoftirqd_run_begin(); ++ ++ timer_si = local_pending_timers(); ++ __this_cpu_write(pending_timer_softirq, 0); ++ or_softirq_pending(timer_si); ++ ++ __do_softirq(); ++ ++ ksoftirqd_run_end(); ++} ++ ++static void raise_ktimers_thread(unsigned int nr) ++{ ++ trace_softirq_raise(nr); ++ __this_cpu_or(pending_timer_softirq, 1 << nr); ++} ++ ++void raise_hrtimer_softirq(void) ++{ ++ raise_ktimers_thread(HRTIMER_SOFTIRQ); ++} ++ ++void raise_timer_softirq(void) ++{ ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ raise_ktimers_thread(TIMER_SOFTIRQ); ++ wake_timersd(); ++ local_irq_restore(flags); ++} ++ ++static struct smp_hotplug_thread timer_threads = { ++ .store = &timersd, ++ .setup = timersd_setup, ++ .thread_should_run = timersd_should_run, ++ .thread_fn = run_timersd, ++ .thread_comm = "ktimers/%u", ++}; ++#endif ++ + static __init int spawn_ksoftirqd(void) + { + cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, + takeover_tasklets); + BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); +- ++#ifdef CONFIG_PREEMPT_RT ++ BUG_ON(smpboot_register_percpu_thread(&timer_threads)); ++#endif + return 0; + } + early_initcall(spawn_ksoftirqd); +diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c +index 5561dabc9b22..c5d480d5da15 100644 +--- a/kernel/time/hrtimer.c ++++ b/kernel/time/hrtimer.c +@@ -1805,7 +1805,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; +- raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ raise_hrtimer_softirq(); + } + + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); +@@ -1918,7 +1918,7 @@ void hrtimer_run_queues(void) + if (!ktime_before(now, cpu_base->softirq_expires_next)) { + cpu_base->softirq_expires_next = KTIME_MAX; + cpu_base->softirq_activated = 1; +- raise_softirq_irqoff(HRTIMER_SOFTIRQ); ++ raise_hrtimer_softirq(); + } + + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); +diff --git a/kernel/time/timer.c b/kernel/time/timer.c +index 717fcb9fb14a..e6219da89933 100644 +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1822,7 +1822,7 @@ static void run_local_timers(void) + if (time_before(jiffies, base->next_expiry)) + return; + } +- raise_softirq(TIMER_SOFTIRQ); ++ raise_timer_softirq(); + } + + /* +-- +2.43.0 + diff --git a/debian/patches-rt/0013-rcutorture-Also-force-sched-priority-to-timersd-on-b.patch b/debian/patches-rt/0013-rcutorture-Also-force-sched-priority-to-timersd-on-b.patch new file mode 100644 index 000000000..9e062b97c --- /dev/null +++ b/debian/patches-rt/0013-rcutorture-Also-force-sched-priority-to-timersd-on-b.patch @@ -0,0 +1,81 @@ +From a10aa54ef224bbd46ca1777e4b9fe960360961cc Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker <frederic@kernel.org> +Date: Tue, 5 Apr 2022 03:07:51 +0200 +Subject: [PATCH 13/62] rcutorture: Also force sched priority to timersd on + boosting test. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +ksoftirqd is statically boosted to the priority level right above the +one of rcu_torture_boost() so that timers, which torture readers rely on, +get a chance to run while rcu_torture_boost() is polling. + +However timers processing got split from ksoftirqd into their own kthread +(timersd) that isn't boosted. It has the same SCHED_FIFO low prio as +rcu_torture_boost() and therefore timers can't preempt it and may +starve. + +The issue can be triggered in practice on v5.17.1-rt17 using: + + ./kvm.sh --allcpus --configs TREE04 --duration 10m --kconfig "CONFIG_EXPERT=y CONFIG_PREEMPT_RT=y" + +Fix this with statically boosting timersd just like is done with +ksoftirqd in commit + ea6d962e80b61 ("rcutorture: Judge RCU priority boosting on grace periods, not callbacks") + +Suggested-by: Mel Gorman <mgorman@suse.de> +Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Cc: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Frederic Weisbecker <frederic@kernel.org> +Link: https://lkml.kernel.org/r/20220405010752.1347437-1-frederic@kernel.org +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/interrupt.h | 1 + + kernel/rcu/rcutorture.c | 6 ++++++ + kernel/softirq.c | 2 +- + 3 files changed, 8 insertions(+), 1 deletion(-) + +diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h +index 0efba74a835c..f459b0f27c94 100644 +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -610,6 +610,7 @@ extern void raise_softirq_irqoff(unsigned int nr); + extern void raise_softirq(unsigned int nr); + + #ifdef CONFIG_PREEMPT_RT ++DECLARE_PER_CPU(struct task_struct *, timersd); + extern void raise_timer_softirq(void); + extern void raise_hrtimer_softirq(void); + +diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c +index 503c2aa845a4..dcd8c0e44c00 100644 +--- a/kernel/rcu/rcutorture.c ++++ b/kernel/rcu/rcutorture.c +@@ -2363,6 +2363,12 @@ static int rcutorture_booster_init(unsigned int cpu) + WARN_ON_ONCE(!t); + sp.sched_priority = 2; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); ++#ifdef CONFIG_PREEMPT_RT ++ t = per_cpu(timersd, cpu); ++ WARN_ON_ONCE(!t); ++ sp.sched_priority = 2; ++ sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); ++#endif + } + + /* Don't allow time recalculation while creating a new task. */ +diff --git a/kernel/softirq.c b/kernel/softirq.c +index ed6d7c41aa17..1892af494cdd 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -638,7 +638,7 @@ static inline void tick_irq_exit(void) + } + + #ifdef CONFIG_PREEMPT_RT +-static DEFINE_PER_CPU(struct task_struct *, timersd); ++DEFINE_PER_CPU(struct task_struct *, timersd); + static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); + + static unsigned int local_pending_timers(void) +-- +2.43.0 + diff --git a/debian/patches-rt/0014-tick-Fix-timer-storm-since-introduction-of-timersd.patch b/debian/patches-rt/0014-tick-Fix-timer-storm-since-introduction-of-timersd.patch new file mode 100644 index 000000000..11eeed939 --- /dev/null +++ b/debian/patches-rt/0014-tick-Fix-timer-storm-since-introduction-of-timersd.patch @@ -0,0 +1,116 @@ +From ae3e63c4320c0c2d3865ba8ecff64a6d03948ce7 Mon Sep 17 00:00:00 2001 +From: Frederic Weisbecker <frederic@kernel.org> +Date: Tue, 5 Apr 2022 03:07:52 +0200 +Subject: [PATCH 14/62] tick: Fix timer storm since introduction of timersd +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +If timers are pending while the tick is reprogrammed on nohz_mode, the +next expiry is not armed to fire now, it is delayed one jiffy forward +instead so as not to raise an inextinguishable timer storm with such +scenario: + +1) IRQ triggers and queue a timer +2) ksoftirqd() is woken up +3) IRQ tail: timer is reprogrammed to fire now +4) IRQ exit +5) TIMER interrupt +6) goto 3) + +...all that until we finally reach ksoftirqd. + +Unfortunately we are checking the wrong softirq vector bitmask since +timersd kthread has split from ksoftirqd. Timers now have their own +vector state field that must be checked separately. As a result, the +old timer storm is back. This shows up early on boot with extremely long +initcalls: + + [ 333.004807] initcall dquot_init+0x0/0x111 returned 0 after 323822879 usecs + +and the cause is uncovered with the right trace events showing just +10 microseconds between ticks (~100 000 Hz): + +|swapper/-1 1dn.h111 60818582us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415486608 +|swapper/-1 1dn.h111 60818592us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415496082 +|swapper/-1 1dn.h111 60818601us : hrtimer_expire_entry: hrtimer=00000000e0ef0f6b function=tick_sched_timer now=60415505550 + +Fix this by checking the right timer vector state from the nohz code. + +Signed-off-by: Frederic Weisbecker <frederic@kernel.org> +Cc: Mel Gorman <mgorman@suse.de> +Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Cc: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220405010752.1347437-2-frederic@kernel.org +--- + include/linux/interrupt.h | 12 ++++++++++++ + kernel/softirq.c | 7 +------ + kernel/time/tick-sched.c | 2 +- + 3 files changed, 14 insertions(+), 7 deletions(-) + +diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h +index f459b0f27c94..a5091ac97fc6 100644 +--- a/include/linux/interrupt.h ++++ b/include/linux/interrupt.h +@@ -611,9 +611,16 @@ extern void raise_softirq(unsigned int nr); + + #ifdef CONFIG_PREEMPT_RT + DECLARE_PER_CPU(struct task_struct *, timersd); ++DECLARE_PER_CPU(unsigned long, pending_timer_softirq); ++ + extern void raise_timer_softirq(void); + extern void raise_hrtimer_softirq(void); + ++static inline unsigned int local_pending_timers(void) ++{ ++ return __this_cpu_read(pending_timer_softirq); ++} ++ + #else + static inline void raise_timer_softirq(void) + { +@@ -624,6 +631,11 @@ static inline void raise_hrtimer_softirq(void) + { + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } ++ ++static inline unsigned int local_pending_timers(void) ++{ ++ return local_softirq_pending(); ++} + #endif + + DECLARE_PER_CPU(struct task_struct *, ksoftirqd); +diff --git a/kernel/softirq.c b/kernel/softirq.c +index 1892af494cdd..ab1fe34326ba 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -639,12 +639,7 @@ static inline void tick_irq_exit(void) + + #ifdef CONFIG_PREEMPT_RT + DEFINE_PER_CPU(struct task_struct *, timersd); +-static DEFINE_PER_CPU(unsigned long, pending_timer_softirq); +- +-static unsigned int local_pending_timers(void) +-{ +- return __this_cpu_read(pending_timer_softirq); +-} ++DEFINE_PER_CPU(unsigned long, pending_timer_softirq); + + static void wake_timersd(void) + { +diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c +index 798e1841d286..b52e1861b913 100644 +--- a/kernel/time/tick-sched.c ++++ b/kernel/time/tick-sched.c +@@ -800,7 +800,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) + + static inline bool local_timer_softirq_pending(void) + { +- return local_softirq_pending() & BIT(TIMER_SOFTIRQ); ++ return local_pending_timers() & BIT(TIMER_SOFTIRQ); + } + + static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) +-- +2.43.0 + diff --git a/debian/patches-rt/0015-softirq-Wake-ktimers-thread-also-in-softirq.patch b/debian/patches-rt/0015-softirq-Wake-ktimers-thread-also-in-softirq.patch new file mode 100644 index 000000000..3b938739e --- /dev/null +++ b/debian/patches-rt/0015-softirq-Wake-ktimers-thread-also-in-softirq.patch @@ -0,0 +1,50 @@ +From ca98adaa69af0a5f3bb28cccb6543ee3e0c4a23f Mon Sep 17 00:00:00 2001 +From: Junxiao Chang <junxiao.chang@intel.com> +Date: Mon, 20 Feb 2023 09:12:20 +0100 +Subject: [PATCH 15/62] softirq: Wake ktimers thread also in softirq. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +If the hrtimer is raised while a softirq is processed then it does not +wake the corresponding ktimers thread. This is due to the optimisation in the +irq-exit path which is also used to wake the ktimers thread. For the other +softirqs, this is okay because the additional softirq bits will be handled by +the currently running softirq handler. +The timer related softirq bits are added to a different variable and rely on +the ktimers thread. +As a consuequence the wake up of ktimersd is delayed until the next timer tick. + +Always wake the ktimers thread if a timer related softirq is pending. + +Reported-by: Peh, Hock Zhang <hock.zhang.peh@intel.com> +Signed-off-by: Junxiao Chang <junxiao.chang@intel.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/softirq.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/kernel/softirq.c b/kernel/softirq.c +index ab1fe34326ba..82f3e68fbe22 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -664,13 +664,12 @@ static inline void __irq_exit_rcu(void) + #endif + account_hardirq_exit(current); + preempt_count_sub(HARDIRQ_OFFSET); +- if (!in_interrupt()) { +- if (local_softirq_pending()) +- invoke_softirq(); ++ if (!in_interrupt() && local_softirq_pending()) ++ invoke_softirq(); + +- if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers()) +- wake_timersd(); +- } ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers() && ++ !(in_nmi() | in_hardirq())) ++ wake_timersd(); + + tick_irq_exit(); + } +-- +2.43.0 + diff --git a/debian/patches-rt/0016-tpm_tis-fix-stall-after-iowrite-s.patch b/debian/patches-rt/0016-tpm_tis-fix-stall-after-iowrite-s.patch new file mode 100644 index 000000000..4004bbf47 --- /dev/null +++ b/debian/patches-rt/0016-tpm_tis-fix-stall-after-iowrite-s.patch @@ -0,0 +1,82 @@ +From 87e5c70f401b5230b5125dedc88e10f54909a37e Mon Sep 17 00:00:00 2001 +From: Haris Okanovic <haris.okanovic@ni.com> +Date: Tue, 15 Aug 2017 15:13:08 -0500 +Subject: [PATCH 16/62] tpm_tis: fix stall after iowrite*()s +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +ioread8() operations to TPM MMIO addresses can stall the cpu when +immediately following a sequence of iowrite*()'s to the same region. + +For example, cyclitest measures ~400us latency spikes when a non-RT +usermode application communicates with an SPI-based TPM chip (Intel Atom +E3940 system, PREEMPT_RT kernel). The spikes are caused by a +stalling ioread8() operation following a sequence of 30+ iowrite8()s to +the same address. I believe this happens because the write sequence is +buffered (in cpu or somewhere along the bus), and gets flushed on the +first LOAD instruction (ioread*()) that follows. + +The enclosed change appears to fix this issue: read the TPM chip's +access register (status code) after every iowrite*() operation to +amortize the cost of flushing data to chip across multiple instructions. + +Signed-off-by: Haris Okanovic <haris.okanovic@ni.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + drivers/char/tpm/tpm_tis.c | 29 +++++++++++++++++++++++++++-- + 1 file changed, 27 insertions(+), 2 deletions(-) + +diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c +index 0d084d6652c4..5d620322bdc2 100644 +--- a/drivers/char/tpm/tpm_tis.c ++++ b/drivers/char/tpm/tpm_tis.c +@@ -50,6 +50,31 @@ static inline struct tpm_tis_tcg_phy *to_tpm_tis_tcg_phy(struct tpm_tis_data *da + return container_of(data, struct tpm_tis_tcg_phy, priv); + } + ++#ifdef CONFIG_PREEMPT_RT ++/* ++ * Flushes previous write operations to chip so that a subsequent ++ * ioread*()s won't stall a cpu. ++ */ ++static inline void tpm_tis_flush(void __iomem *iobase) ++{ ++ ioread8(iobase + TPM_ACCESS(0)); ++} ++#else ++#define tpm_tis_flush(iobase) do { } while (0) ++#endif ++ ++static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr) ++{ ++ iowrite8(b, iobase + addr); ++ tpm_tis_flush(iobase); ++} ++ ++static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr) ++{ ++ iowrite32(b, iobase + addr); ++ tpm_tis_flush(iobase); ++} ++ + static int interrupts = -1; + module_param(interrupts, int, 0444); + MODULE_PARM_DESC(interrupts, "Enable interrupts"); +@@ -202,12 +227,12 @@ static int tpm_tcg_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len, + switch (io_mode) { + case TPM_TIS_PHYS_8: + while (len--) +- iowrite8(*value++, phy->iobase + addr); ++ tpm_tis_iowrite8(*value++, phy->iobase, addr); + break; + case TPM_TIS_PHYS_16: + return -EINVAL; + case TPM_TIS_PHYS_32: +- iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr); ++ tpm_tis_iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase, addr); + break; + } + +-- +2.43.0 + diff --git a/debian/patches-rt/0017-zram-Replace-bit-spinlocks-with-spinlock_t-for-PREEM.patch b/debian/patches-rt/0017-zram-Replace-bit-spinlocks-with-spinlock_t-for-PREEM.patch new file mode 100644 index 000000000..b3f1a71bc --- /dev/null +++ b/debian/patches-rt/0017-zram-Replace-bit-spinlocks-with-spinlock_t-for-PREEM.patch @@ -0,0 +1,100 @@ +From 8397109d43ef57d5e91d738354b9c30f49cb2f95 Mon Sep 17 00:00:00 2001 +From: Mike Galbraith <umgwanakikbuti@gmail.com> +Date: Thu, 31 Mar 2016 04:08:28 +0200 +Subject: [PATCH 17/62] zram: Replace bit spinlocks with spinlock_t for + PREEMPT_RT. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The bit spinlock disables preemption on PREEMPT_RT. With disabled preemption it +is not allowed to acquire other sleeping locks which includes invoking +zs_free(). + +Use a spinlock_t on PREEMPT_RT for locking and set/ clear ZRAM_LOCK after the +lock has been acquired/ dropped. + +Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/YqIbMuHCPiQk+Ac2@linutronix.de +--- + drivers/block/zram/zram_drv.c | 36 +++++++++++++++++++++++++++++++++++ + drivers/block/zram/zram_drv.h | 3 +++ + 2 files changed, 39 insertions(+) + +diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c +index 966aab902d19..ee69e4443691 100644 +--- a/drivers/block/zram/zram_drv.c ++++ b/drivers/block/zram/zram_drv.c +@@ -57,6 +57,40 @@ static void zram_free_page(struct zram *zram, size_t index); + static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio); + ++#ifdef CONFIG_PREEMPT_RT ++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) ++{ ++ size_t index; ++ ++ for (index = 0; index < num_pages; index++) ++ spin_lock_init(&zram->table[index].lock); ++} ++ ++static int zram_slot_trylock(struct zram *zram, u32 index) ++{ ++ int ret; ++ ++ ret = spin_trylock(&zram->table[index].lock); ++ if (ret) ++ __set_bit(ZRAM_LOCK, &zram->table[index].flags); ++ return ret; ++} ++ ++static void zram_slot_lock(struct zram *zram, u32 index) ++{ ++ spin_lock(&zram->table[index].lock); ++ __set_bit(ZRAM_LOCK, &zram->table[index].flags); ++} ++ ++static void zram_slot_unlock(struct zram *zram, u32 index) ++{ ++ __clear_bit(ZRAM_LOCK, &zram->table[index].flags); ++ spin_unlock(&zram->table[index].lock); ++} ++ ++#else ++ ++static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } + + static int zram_slot_trylock(struct zram *zram, u32 index) + { +@@ -72,6 +106,7 @@ static void zram_slot_unlock(struct zram *zram, u32 index) + { + bit_spin_unlock(ZRAM_LOCK, &zram->table[index].flags); + } ++#endif + + static inline bool init_done(struct zram *zram) + { +@@ -1187,6 +1222,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) + + if (!huge_class_size) + huge_class_size = zs_huge_class_size(zram->mem_pool); ++ zram_meta_init_table_locks(zram, num_pages); + return true; + } + +diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h +index a2bda53020fd..ae7950b26db5 100644 +--- a/drivers/block/zram/zram_drv.h ++++ b/drivers/block/zram/zram_drv.h +@@ -62,6 +62,9 @@ struct zram_table_entry { + unsigned long element; + }; + unsigned long flags; ++#ifdef CONFIG_PREEMPT_RT ++ spinlock_t lock; ++#endif + #ifdef CONFIG_ZRAM_MEMORY_TRACKING + ktime_t ac_time; + #endif +-- +2.43.0 + diff --git a/debian/patches-rt/0018-locking-lockdep-Remove-lockdep_init_map_crosslock.patch b/debian/patches-rt/0018-locking-lockdep-Remove-lockdep_init_map_crosslock.patch new file mode 100644 index 000000000..3dcd8cb9b --- /dev/null +++ b/debian/patches-rt/0018-locking-lockdep-Remove-lockdep_init_map_crosslock.patch @@ -0,0 +1,34 @@ +From dd162e2589601c792a81a3c19ef4a87510ed6ce5 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 11 Mar 2022 17:44:57 +0100 +Subject: [PATCH 18/62] locking/lockdep: Remove lockdep_init_map_crosslock. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The cross-release bits have been removed, lockdep_init_map_crosslock() is +a leftover. + +Remove lockdep_init_map_crosslock. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Reviewed-by: Waiman Long <longman@redhat.com> +Link: https://lore.kernel.org/r/20220311164457.46461-1-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/YqITgY+2aPITu96z@linutronix.de +--- + include/linux/lockdep.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h +index 1f1099dac3f0..1023f349af71 100644 +--- a/include/linux/lockdep.h ++++ b/include/linux/lockdep.h +@@ -435,7 +435,6 @@ enum xhlock_context_t { + XHLOCK_CTX_NR, + }; + +-#define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) + /* + * To initialize a lockdep_map statically use this macro. + * Note that _name must not be NULL. +-- +2.43.0 + diff --git a/debian/patches-rt/0019-printk-Bring-back-the-RT-bits.patch b/debian/patches-rt/0019-printk-Bring-back-the-RT-bits.patch new file mode 100644 index 000000000..969f44377 --- /dev/null +++ b/debian/patches-rt/0019-printk-Bring-back-the-RT-bits.patch @@ -0,0 +1,1234 @@ +From dd4e52ae061806da556008ca819481befcff0fd3 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 19 Jul 2022 20:08:01 +0200 +Subject: [PATCH 19/62] printk: Bring back the RT bits. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +This is a revert of the commits: +| 07a22b61946f0 Revert "printk: add functions to prefer direct printing" +| 5831788afb17b Revert "printk: add kthread console printers" +| 2d9ef940f89e0 Revert "printk: extend console_lock for per-console locking" +| 007eeab7e9f03 Revert "printk: remove @console_locked" +| 05c96b3713aa2 Revert "printk: Block console kthreads when direct printing will be required" +| 20fb0c8272bbb Revert "printk: Wait for the global console lock when the system is going down" + +which is needed for the atomic consoles which are used on PREEMPT_RT. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/tty/sysrq.c | 2 + + include/linux/console.h | 17 ++ + include/linux/printk.h | 15 + + kernel/hung_task.c | 11 +- + kernel/panic.c | 4 + + kernel/printk/internal.h | 2 + + kernel/printk/printk.c | 586 ++++++++++++++++++++++++++++++++---- + kernel/printk/printk_safe.c | 32 ++ + kernel/rcu/tree_stall.h | 2 + + kernel/reboot.c | 16 +- + kernel/watchdog.c | 4 + + kernel/watchdog_hld.c | 4 + + 12 files changed, 636 insertions(+), 59 deletions(-) + +diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c +index 248067197287..0db9dad8c99f 100644 +--- a/drivers/tty/sysrq.c ++++ b/drivers/tty/sysrq.c +@@ -582,6 +582,7 @@ void __handle_sysrq(int key, bool check_mask) + + rcu_sysrq_start(); + rcu_read_lock(); ++ printk_prefer_direct_enter(); + /* + * Raise the apparent loglevel to maximum so that the sysrq header + * is shown to provide the user with positive feedback. We do not +@@ -623,6 +624,7 @@ void __handle_sysrq(int key, bool check_mask) + pr_cont("\n"); + console_loglevel = orig_log_level; + } ++ printk_prefer_direct_exit(); + rcu_read_unlock(); + rcu_sysrq_end(); + +diff --git a/include/linux/console.h b/include/linux/console.h +index 8c1686e2c233..143653090c48 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -16,6 +16,7 @@ + + #include <linux/atomic.h> + #include <linux/types.h> ++#include <linux/mutex.h> + + struct vc_data; + struct console_font_op; +@@ -153,6 +154,22 @@ struct console { + uint ospeed; + u64 seq; + unsigned long dropped; ++ struct task_struct *thread; ++ bool blocked; ++ ++ /* ++ * The per-console lock is used by printing kthreads to synchronize ++ * this console with callers of console_lock(). This is necessary in ++ * order to allow printing kthreads to run in parallel to each other, ++ * while each safely accessing the @blocked field and synchronizing ++ * against direct printing via console_lock/console_unlock. ++ * ++ * Note: For synchronizing against direct printing via ++ * console_trylock/console_unlock, see the static global ++ * variable @console_kthreads_active. ++ */ ++ struct mutex lock; ++ + void *data; + struct console *next; + }; +diff --git a/include/linux/printk.h b/include/linux/printk.h +index 8c81806c2e99..f8c4e4fa6d7d 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -168,6 +168,9 @@ extern void __printk_safe_exit(void); + */ + #define printk_deferred_enter __printk_safe_enter + #define printk_deferred_exit __printk_safe_exit ++extern void printk_prefer_direct_enter(void); ++extern void printk_prefer_direct_exit(void); ++extern void try_block_console_kthreads(int timeout_ms); + + /* + * Please don't use printk_ratelimit(), because it shares ratelimiting state +@@ -219,6 +222,18 @@ static inline void printk_deferred_exit(void) + { + } + ++static inline void printk_prefer_direct_enter(void) ++{ ++} ++ ++static inline void printk_prefer_direct_exit(void) ++{ ++} ++ ++static inline void try_block_console_kthreads(int timeout_ms) ++{ ++} ++ + static inline int printk_ratelimit(void) + { + return 0; +diff --git a/kernel/hung_task.c b/kernel/hung_task.c +index c71889f3f3fc..e2d2344cb9f4 100644 +--- a/kernel/hung_task.c ++++ b/kernel/hung_task.c +@@ -127,6 +127,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) + * complain: + */ + if (sysctl_hung_task_warnings) { ++ printk_prefer_direct_enter(); ++ + if (sysctl_hung_task_warnings > 0) + sysctl_hung_task_warnings--; + pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", +@@ -142,6 +144,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) + + if (sysctl_hung_task_all_cpu_backtrace) + hung_task_show_all_bt = true; ++ ++ printk_prefer_direct_exit(); + } + + touch_nmi_watchdog(); +@@ -212,12 +216,17 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) + } + unlock: + rcu_read_unlock(); +- if (hung_task_show_lock) ++ if (hung_task_show_lock) { ++ printk_prefer_direct_enter(); + debug_show_all_locks(); ++ printk_prefer_direct_exit(); ++ } + + if (hung_task_show_all_bt) { + hung_task_show_all_bt = false; ++ printk_prefer_direct_enter(); + trigger_all_cpu_backtrace(); ++ printk_prefer_direct_exit(); + } + + if (hung_task_call_panic) +diff --git a/kernel/panic.c b/kernel/panic.c +index 63e94f3bd8dc..88cd873c7c30 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -653,6 +653,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, + { + disable_trace_on_warning(); + ++ printk_prefer_direct_enter(); ++ + if (file) + pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", + raw_smp_processor_id(), current->pid, file, line, +@@ -681,6 +683,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, + + /* Just a warning, don't kill lockdep. */ + add_taint(taint, LOCKDEP_STILL_OK); ++ ++ printk_prefer_direct_exit(); + } + + #ifndef __WARN_FLAGS +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index d947ca6c84f9..e7d8578860ad 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -20,6 +20,8 @@ enum printk_info_flags { + LOG_CONT = 8, /* text is a fragment of a continuation line */ + }; + ++extern bool block_console_kthreads; ++ + __printf(4, 0) + int vprintk_store(int facility, int level, + const struct dev_printk_info *dev_info, +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index cc53fb77f77c..e9f9b66608a0 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -220,6 +220,36 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, + } + #endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ + ++/* ++ * Used to synchronize printing kthreads against direct printing via ++ * console_trylock/console_unlock. ++ * ++ * Values: ++ * -1 = console kthreads atomically blocked (via global trylock) ++ * 0 = no kthread printing, console not locked (via trylock) ++ * >0 = kthread(s) actively printing ++ * ++ * Note: For synchronizing against direct printing via ++ * console_lock/console_unlock, see the @lock variable in ++ * struct console. ++ */ ++static atomic_t console_kthreads_active = ATOMIC_INIT(0); ++ ++#define console_kthreads_atomic_tryblock() \ ++ (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) ++#define console_kthreads_atomic_unblock() \ ++ atomic_cmpxchg(&console_kthreads_active, -1, 0) ++#define console_kthreads_atomically_blocked() \ ++ (atomic_read(&console_kthreads_active) == -1) ++ ++#define console_kthread_printing_tryenter() \ ++ atomic_inc_unless_negative(&console_kthreads_active) ++#define console_kthread_printing_exit() \ ++ atomic_dec(&console_kthreads_active) ++ ++/* Block console kthreads to avoid processing new messages. */ ++bool block_console_kthreads; ++ + /* + * Helper macros to handle lockdep when locking/unlocking console_sem. We use + * macros instead of functions so that _RET_IP_ contains useful information. +@@ -268,14 +298,49 @@ static bool panic_in_progress(void) + } + + /* +- * This is used for debugging the mess that is the VT code by +- * keeping track if we have the console semaphore held. It's +- * definitely not the perfect debug tool (we don't know if _WE_ +- * hold it and are racing, but it helps tracking those weird code +- * paths in the console code where we end up in places I want +- * locked without the console semaphore held). ++ * Tracks whether kthread printers are all blocked. A value of true implies ++ * that the console is locked via console_lock() or the console is suspended. ++ * Writing to this variable requires holding @console_sem. ++ */ ++static bool console_kthreads_blocked; ++ ++/* ++ * Block all kthread printers from a schedulable context. ++ * ++ * Requires holding @console_sem. ++ */ ++static void console_kthreads_block(void) ++{ ++ struct console *con; ++ ++ for_each_console(con) { ++ mutex_lock(&con->lock); ++ con->blocked = true; ++ mutex_unlock(&con->lock); ++ } ++ ++ console_kthreads_blocked = true; ++} ++ ++/* ++ * Unblock all kthread printers from a schedulable context. ++ * ++ * Requires holding @console_sem. + */ +-static int console_locked, console_suspended; ++static void console_kthreads_unblock(void) ++{ ++ struct console *con; ++ ++ for_each_console(con) { ++ mutex_lock(&con->lock); ++ con->blocked = false; ++ mutex_unlock(&con->lock); ++ } ++ ++ console_kthreads_blocked = false; ++} ++ ++static int console_suspended; + + /* + * Array of consoles built from command line options (console=) +@@ -358,7 +423,75 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; + /* syslog_lock protects syslog_* variables and write access to clear_seq. */ + static DEFINE_MUTEX(syslog_lock); + ++/* ++ * A flag to signify if printk_activate_kthreads() has already started the ++ * kthread printers. If true, any later registered consoles must start their ++ * own kthread directly. The flag is write protected by the console_lock. ++ */ ++static bool printk_kthreads_available; ++ + #ifdef CONFIG_PRINTK ++static atomic_t printk_prefer_direct = ATOMIC_INIT(0); ++ ++/** ++ * printk_prefer_direct_enter - cause printk() calls to attempt direct ++ * printing to all enabled consoles ++ * ++ * Since it is not possible to call into the console printing code from any ++ * context, there is no guarantee that direct printing will occur. ++ * ++ * This globally effects all printk() callers. ++ * ++ * Context: Any context. ++ */ ++void printk_prefer_direct_enter(void) ++{ ++ atomic_inc(&printk_prefer_direct); ++} ++ ++/** ++ * printk_prefer_direct_exit - restore printk() behavior ++ * ++ * Context: Any context. ++ */ ++void printk_prefer_direct_exit(void) ++{ ++ WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); ++} ++ ++/* ++ * Calling printk() always wakes kthread printers so that they can ++ * flush the new message to their respective consoles. Also, if direct ++ * printing is allowed, printk() tries to flush the messages directly. ++ * ++ * Direct printing is allowed in situations when the kthreads ++ * are not available or the system is in a problematic state. ++ * ++ * See the implementation about possible races. ++ */ ++static inline bool allow_direct_printing(void) ++{ ++ /* ++ * Checking kthread availability is a possible race because the ++ * kthread printers can become permanently disabled during runtime. ++ * However, doing that requires holding the console_lock, so any ++ * pending messages will be direct printed by console_unlock(). ++ */ ++ if (!printk_kthreads_available) ++ return true; ++ ++ /* ++ * Prefer direct printing when the system is in a problematic state. ++ * The context that sets this state will always see the updated value. ++ * The other contexts do not care. Anyway, direct printing is just a ++ * best effort. The direct output is only possible when console_lock ++ * is not already taken and no kthread printers are actively printing. ++ */ ++ return (system_state > SYSTEM_RUNNING || ++ oops_in_progress || ++ atomic_read(&printk_prefer_direct)); ++} ++ + DECLARE_WAIT_QUEUE_HEAD(log_wait); + /* All 3 protected by @syslog_lock. */ + /* the next printk record to read by syslog(READ) or /proc/kmsg */ +@@ -2249,10 +2382,10 @@ asmlinkage int vprintk_emit(int facility, int level, + printed_len = vprintk_store(facility, level, dev_info, fmt, args); + + /* If called from the scheduler, we can not call up(). */ +- if (!in_sched) { ++ if (!in_sched && allow_direct_printing()) { + /* + * The caller may be holding system-critical or +- * timing-sensitive locks. Disable preemption during ++ * timing-sensitive locks. Disable preemption during direct + * printing of all remaining records to all consoles so that + * this context can return as soon as possible. Hopefully + * another printk() caller will take over the printing. +@@ -2300,6 +2433,8 @@ EXPORT_SYMBOL(_printk); + static bool pr_flush(int timeout_ms, bool reset_on_progress); + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); + ++static void printk_start_kthread(struct console *con); ++ + #else /* CONFIG_PRINTK */ + + #define CONSOLE_LOG_MAX 0 +@@ -2334,6 +2469,8 @@ static void call_console_driver(struct console *con, const char *text, size_t le + static bool suppress_message_printing(int level) { return false; } + static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } ++static void printk_start_kthread(struct console *con) { } ++static bool allow_direct_printing(void) { return true; } + + #endif /* CONFIG_PRINTK */ + +@@ -2552,6 +2689,14 @@ static int console_cpu_notify(unsigned int cpu) + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); ++ else { ++ /* ++ * If a new CPU comes online, the conditions for ++ * printer_should_wake() may have changed for some ++ * kthread printer with !CON_ANYTIME. ++ */ ++ wake_up_klogd(); ++ } + } + return 0; + } +@@ -2594,7 +2739,7 @@ void console_lock(void) + down_console_sem(); + if (console_suspended) + return; +- console_locked = 1; ++ console_kthreads_block(); + console_may_schedule = 1; + } + EXPORT_SYMBOL(console_lock); +@@ -2618,15 +2763,30 @@ int console_trylock(void) + up_console_sem(); + return 0; + } +- console_locked = 1; ++ if (!console_kthreads_atomic_tryblock()) { ++ up_console_sem(); ++ return 0; ++ } + console_may_schedule = 0; + return 1; + } + EXPORT_SYMBOL(console_trylock); + ++/* ++ * This is used to help to make sure that certain paths within the VT code are ++ * running with the console lock held. It is definitely not the perfect debug ++ * tool (it is not known if the VT code is the task holding the console lock), ++ * but it helps tracking those weird code paths in the console code such as ++ * when the console is suspended: where the console is not locked but no ++ * console printing may occur. ++ * ++ * Note: This returns true when the console is suspended but is not locked. ++ * This is intentional because the VT code must consider that situation ++ * the same as if the console was locked. ++ */ + int is_console_locked(void) + { +- return console_locked; ++ return (console_kthreads_blocked || atomic_read(&console_kthreads_active)); + } + EXPORT_SYMBOL(is_console_locked); + +@@ -2636,12 +2796,9 @@ EXPORT_SYMBOL(is_console_locked); + * + * Requires the console_lock. + */ +-static inline bool console_is_usable(struct console *con) ++static inline bool __console_is_usable(short flags) + { +- if (!(con->flags & CON_ENABLED)) +- return false; +- +- if (!con->write) ++ if (!(flags & CON_ENABLED)) + return false; + + /* +@@ -2650,15 +2807,43 @@ static inline bool console_is_usable(struct console *con) + * cope (CON_ANYTIME) don't call them until this CPU is officially up. + */ + if (!cpu_online(raw_smp_processor_id()) && +- !(con->flags & CON_ANYTIME)) ++ !(flags & CON_ANYTIME)) + return false; + + return true; + } + ++/* ++ * Check if the given console is currently capable and allowed to print ++ * records. ++ * ++ * Requires holding the console_lock. ++ */ ++static inline bool console_is_usable(struct console *con) ++{ ++ if (!con->write) ++ return false; ++ ++ return __console_is_usable(con->flags); ++} ++ + static void __console_unlock(void) + { +- console_locked = 0; ++ /* ++ * Depending on whether console_lock() or console_trylock() was used, ++ * appropriately allow the kthread printers to continue. ++ */ ++ if (console_kthreads_blocked) ++ console_kthreads_unblock(); ++ else ++ console_kthreads_atomic_unblock(); ++ ++ /* ++ * New records may have arrived while the console was locked. ++ * Wake the kthread printers to print them. ++ */ ++ wake_up_klogd(); ++ + up_console_sem(); + } + +@@ -2676,17 +2861,19 @@ static void __console_unlock(void) + * + * @handover will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding the +- * console_lock. Otherwise it is set to false. ++ * console_lock. Otherwise it is set to false. A NULL pointer may be provided ++ * to disable allowing the console_lock to be taken over by a printk waiter. + * + * Returns false if the given console has no next record to print, otherwise + * true. + * +- * Requires the console_lock. ++ * Requires the console_lock if @handover is non-NULL. ++ * Requires con->lock otherwise. + */ +-static bool console_emit_next_record(struct console *con, char *text, char *ext_text, +- char *dropped_text, bool *handover) ++static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, ++ char *dropped_text, bool *handover) + { +- static int panic_console_dropped; ++ static atomic_t panic_console_dropped = ATOMIC_INIT(0); + struct printk_info info; + struct printk_record r; + unsigned long flags; +@@ -2695,7 +2882,8 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ + + prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); + +- *handover = false; ++ if (handover) ++ *handover = false; + + if (!prb_read_valid(prb, con->seq, &r)) + return false; +@@ -2703,7 +2891,8 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ + if (con->seq != r.info->seq) { + con->dropped += r.info->seq - con->seq; + con->seq = r.info->seq; +- if (panic_in_progress() && panic_console_dropped++ > 10) { ++ if (panic_in_progress() && ++ atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { + suppress_panic_printk = 1; + pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); + } +@@ -2725,31 +2914,61 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ + len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); + } + +- /* +- * While actively printing out messages, if another printk() +- * were to occur on another CPU, it may wait for this one to +- * finish. This task can not be preempted if there is a +- * waiter waiting to take over. +- * +- * Interrupts are disabled because the hand over to a waiter +- * must not be interrupted until the hand over is completed +- * (@console_waiter is cleared). +- */ +- printk_safe_enter_irqsave(flags); +- console_lock_spinning_enable(); ++ if (handover) { ++ /* ++ * While actively printing out messages, if another printk() ++ * were to occur on another CPU, it may wait for this one to ++ * finish. This task can not be preempted if there is a ++ * waiter waiting to take over. ++ * ++ * Interrupts are disabled because the hand over to a waiter ++ * must not be interrupted until the hand over is completed ++ * (@console_waiter is cleared). ++ */ ++ printk_safe_enter_irqsave(flags); ++ console_lock_spinning_enable(); ++ ++ /* don't trace irqsoff print latency */ ++ stop_critical_timings(); ++ } + +- stop_critical_timings(); /* don't trace print latency */ + call_console_driver(con, write_text, len, dropped_text); +- start_critical_timings(); + + con->seq++; + +- *handover = console_lock_spinning_disable_and_check(); +- printk_safe_exit_irqrestore(flags); ++ if (handover) { ++ start_critical_timings(); ++ *handover = console_lock_spinning_disable_and_check(); ++ printk_safe_exit_irqrestore(flags); ++ } + skip: + return true; + } + ++/* ++ * Print a record for a given console, but allow another printk() caller to ++ * take over the console_lock and continue printing. ++ * ++ * Requires the console_lock, but depending on @handover after the call, the ++ * caller may no longer have the console_lock. ++ * ++ * See __console_emit_next_record() for argument and return details. ++ */ ++static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text, ++ char *dropped_text, bool *handover) ++{ ++ /* ++ * Handovers are only supported if threaded printers are atomically ++ * blocked. The context taking over the console_lock may be atomic. ++ */ ++ if (!console_kthreads_atomically_blocked()) { ++ *handover = false; ++ handover = NULL; ++ } ++ ++ return __console_emit_next_record(con, text, ext_text, dropped_text, handover); ++} ++ + /* + * Print out all remaining records to all consoles. + * +@@ -2768,8 +2987,8 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ + * were flushed to all usable consoles. A returned false informs the caller + * that everything was not flushed (either there were no usable consoles or + * another context has taken over printing or it is a panic situation and this +- * is not the panic CPU). Regardless the reason, the caller should assume it +- * is not useful to immediately try again. ++ * is not the panic CPU or direct printing is not preferred). Regardless the ++ * reason, the caller should assume it is not useful to immediately try again. + * + * Requires the console_lock. + */ +@@ -2786,6 +3005,10 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + *handover = false; + + do { ++ /* Let the kthread printers do the work if they can. */ ++ if (!allow_direct_printing()) ++ return false; ++ + any_progress = false; + + for_each_console(con) { +@@ -2797,13 +3020,11 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + + if (con->flags & CON_EXTENDED) { + /* Extended consoles do not print "dropped messages". */ +- progress = console_emit_next_record(con, &text[0], +- &ext_text[0], NULL, +- handover); ++ progress = console_emit_next_record_transferable(con, &text[0], ++ &ext_text[0], NULL, handover); + } else { +- progress = console_emit_next_record(con, &text[0], +- NULL, &dropped_text[0], +- handover); ++ progress = console_emit_next_record_transferable(con, &text[0], ++ NULL, &dropped_text[0], handover); + } + if (*handover) + return false; +@@ -2918,10 +3139,13 @@ void console_unblank(void) + if (oops_in_progress) { + if (down_trylock_console_sem() != 0) + return; ++ if (!console_kthreads_atomic_tryblock()) { ++ up_console_sem(); ++ return; ++ } + } else + console_lock(); + +- console_locked = 1; + console_may_schedule = 0; + for_each_console(c) + if ((c->flags & CON_ENABLED) && c->unblank) +@@ -3197,6 +3421,10 @@ void register_console(struct console *newcon) + } + + newcon->dropped = 0; ++ newcon->thread = NULL; ++ newcon->blocked = true; ++ mutex_init(&newcon->lock); ++ + if (newcon->flags & CON_PRINTBUFFER) { + /* Get a consistent copy of @syslog_seq. */ + mutex_lock(&syslog_lock); +@@ -3206,6 +3434,10 @@ void register_console(struct console *newcon) + /* Begin with next message. */ + newcon->seq = prb_next_seq(prb); + } ++ ++ if (printk_kthreads_available) ++ printk_start_kthread(newcon); ++ + console_unlock(); + console_sysfs_notify(); + +@@ -3229,6 +3461,7 @@ EXPORT_SYMBOL(register_console); + + int unregister_console(struct console *console) + { ++ struct task_struct *thd; + struct console *con; + int res; + +@@ -3266,7 +3499,20 @@ int unregister_console(struct console *console) + console_drivers->flags |= CON_CONSDEV; + + console->flags &= ~CON_ENABLED; ++ ++ /* ++ * console->thread can only be cleared under the console lock. But ++ * stopping the thread must be done without the console lock. The ++ * task that clears @thread is the task that stops the kthread. ++ */ ++ thd = console->thread; ++ console->thread = NULL; ++ + console_unlock(); ++ ++ if (thd) ++ kthread_stop(thd); ++ + console_sysfs_notify(); + + if (console->exit) +@@ -3362,6 +3608,20 @@ static int __init printk_late_init(void) + } + late_initcall(printk_late_init); + ++static int __init printk_activate_kthreads(void) ++{ ++ struct console *con; ++ ++ console_lock(); ++ printk_kthreads_available = true; ++ for_each_console(con) ++ printk_start_kthread(con); ++ console_unlock(); ++ ++ return 0; ++} ++early_initcall(printk_activate_kthreads); ++ + #if defined CONFIG_PRINTK + /* If @con is specified, only wait for that console. Otherwise wait for all. */ + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) +@@ -3444,11 +3704,208 @@ static bool pr_flush(int timeout_ms, bool reset_on_progress) + return __pr_flush(NULL, timeout_ms, reset_on_progress); + } + ++static void __printk_fallback_preferred_direct(void) ++{ ++ printk_prefer_direct_enter(); ++ pr_err("falling back to preferred direct printing\n"); ++ printk_kthreads_available = false; ++} ++ ++/* ++ * Enter preferred direct printing, but never exit. Mark console threads as ++ * unavailable. The system is then forever in preferred direct printing and ++ * any printing threads will exit. ++ * ++ * Must *not* be called under console_lock. Use ++ * __printk_fallback_preferred_direct() if already holding console_lock. ++ */ ++static void printk_fallback_preferred_direct(void) ++{ ++ console_lock(); ++ __printk_fallback_preferred_direct(); ++ console_unlock(); ++} ++ ++/* ++ * Print a record for a given console, not allowing another printk() caller ++ * to take over. This is appropriate for contexts that do not have the ++ * console_lock. ++ * ++ * See __console_emit_next_record() for argument and return details. ++ */ ++static bool console_emit_next_record(struct console *con, char *text, char *ext_text, ++ char *dropped_text) ++{ ++ return __console_emit_next_record(con, text, ext_text, dropped_text, NULL); ++} ++ ++static bool printer_should_wake(struct console *con, u64 seq) ++{ ++ short flags; ++ ++ if (kthread_should_stop() || !printk_kthreads_available) ++ return true; ++ ++ if (con->blocked || ++ console_kthreads_atomically_blocked() || ++ block_console_kthreads || ++ system_state > SYSTEM_RUNNING || ++ oops_in_progress) { ++ return false; ++ } ++ ++ /* ++ * This is an unsafe read from con->flags, but a false positive is ++ * not a problem. Worst case it would allow the printer to wake up ++ * although it is disabled. But the printer will notice that when ++ * attempting to print and instead go back to sleep. ++ */ ++ flags = data_race(READ_ONCE(con->flags)); ++ ++ if (!__console_is_usable(flags)) ++ return false; ++ ++ return prb_read_valid(prb, seq, NULL); ++} ++ ++static int printk_kthread_func(void *data) ++{ ++ struct console *con = data; ++ char *dropped_text = NULL; ++ char *ext_text = NULL; ++ u64 seq = 0; ++ char *text; ++ int error; ++ ++ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); ++ if (!text) { ++ con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); ++ printk_fallback_preferred_direct(); ++ goto out; ++ } ++ ++ if (con->flags & CON_EXTENDED) { ++ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); ++ if (!ext_text) { ++ con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n"); ++ printk_fallback_preferred_direct(); ++ goto out; ++ } ++ } else { ++ dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); ++ if (!dropped_text) { ++ con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n"); ++ printk_fallback_preferred_direct(); ++ goto out; ++ } ++ } ++ ++ con_printk(KERN_INFO, con, "printing thread started\n"); ++ for (;;) { ++ /* ++ * Guarantee this task is visible on the waitqueue before ++ * checking the wake condition. ++ * ++ * The full memory barrier within set_current_state() of ++ * prepare_to_wait_event() pairs with the full memory barrier ++ * within wq_has_sleeper(). ++ * ++ * This pairs with __wake_up_klogd:A. ++ */ ++ error = wait_event_interruptible(log_wait, ++ printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */ ++ ++ if (kthread_should_stop() || !printk_kthreads_available) ++ break; ++ ++ if (error) ++ continue; ++ ++ error = mutex_lock_interruptible(&con->lock); ++ if (error) ++ continue; ++ ++ if (con->blocked || ++ !console_kthread_printing_tryenter()) { ++ /* Another context has locked the console_lock. */ ++ mutex_unlock(&con->lock); ++ continue; ++ } ++ ++ /* ++ * Although this context has not locked the console_lock, it ++ * is known that the console_lock is not locked and it is not ++ * possible for any other context to lock the console_lock. ++ * Therefore it is safe to read con->flags. ++ */ ++ ++ if (!__console_is_usable(con->flags)) { ++ console_kthread_printing_exit(); ++ mutex_unlock(&con->lock); ++ continue; ++ } ++ ++ /* ++ * Even though the printk kthread is always preemptible, it is ++ * still not allowed to call cond_resched() from within ++ * console drivers. The task may become non-preemptible in the ++ * console driver call chain. For example, vt_console_print() ++ * takes a spinlock and then can call into fbcon_redraw(), ++ * which can conditionally invoke cond_resched(). ++ */ ++ console_may_schedule = 0; ++ console_emit_next_record(con, text, ext_text, dropped_text); ++ ++ seq = con->seq; ++ ++ console_kthread_printing_exit(); ++ ++ mutex_unlock(&con->lock); ++ } ++ ++ con_printk(KERN_INFO, con, "printing thread stopped\n"); ++out: ++ kfree(dropped_text); ++ kfree(ext_text); ++ kfree(text); ++ ++ console_lock(); ++ /* ++ * If this kthread is being stopped by another task, con->thread will ++ * already be NULL. That is fine. The important thing is that it is ++ * NULL after the kthread exits. ++ */ ++ con->thread = NULL; ++ console_unlock(); ++ ++ return 0; ++} ++ ++/* Must be called under console_lock. */ ++static void printk_start_kthread(struct console *con) ++{ ++ /* ++ * Do not start a kthread if there is no write() callback. The ++ * kthreads assume the write() callback exists. ++ */ ++ if (!con->write) ++ return; ++ ++ con->thread = kthread_run(printk_kthread_func, con, ++ "pr/%s%d", con->name, con->index); ++ if (IS_ERR(con->thread)) { ++ con->thread = NULL; ++ con_printk(KERN_ERR, con, "unable to start printing thread\n"); ++ __printk_fallback_preferred_direct(); ++ return; ++ } ++} ++ + /* + * Delayed printk version, for scheduler-internal messages: + */ +-#define PRINTK_PENDING_WAKEUP 0x01 +-#define PRINTK_PENDING_OUTPUT 0x02 ++#define PRINTK_PENDING_WAKEUP 0x01 ++#define PRINTK_PENDING_DIRECT_OUTPUT 0x02 + + static DEFINE_PER_CPU(int, printk_pending); + +@@ -3456,10 +3913,14 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) + { + int pending = this_cpu_xchg(printk_pending, 0); + +- if (pending & PRINTK_PENDING_OUTPUT) { ++ if (pending & PRINTK_PENDING_DIRECT_OUTPUT) { ++ printk_prefer_direct_enter(); ++ + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); ++ ++ printk_prefer_direct_exit(); + } + + if (pending & PRINTK_PENDING_WAKEUP) +@@ -3484,10 +3945,11 @@ static void __wake_up_klogd(int val) + * prepare_to_wait_event(), which is called after ___wait_event() adds + * the waiter but before it has checked the wait condition. + * +- * This pairs with devkmsg_read:A and syslog_print:A. ++ * This pairs with devkmsg_read:A, syslog_print:A, and ++ * printk_kthread_func:A. + */ + if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ +- (val & PRINTK_PENDING_OUTPUT)) { ++ (val & PRINTK_PENDING_DIRECT_OUTPUT)) { + this_cpu_or(printk_pending, val); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); + } +@@ -3527,7 +3989,17 @@ void defer_console_output(void) + * New messages may have been added directly to the ringbuffer + * using vprintk_store(), so wake any waiters as well. + */ +- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); ++ int val = PRINTK_PENDING_WAKEUP; ++ ++ /* ++ * Make sure that some context will print the messages when direct ++ * printing is allowed. This happens in situations when the kthreads ++ * may not be as reliable or perhaps unusable. ++ */ ++ if (allow_direct_printing()) ++ val |= PRINTK_PENDING_DIRECT_OUTPUT; ++ ++ __wake_up_klogd(val); + } + + void printk_trigger_flush(void) +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +index 6d10927a07d8..8e8fd2fb0a5b 100644 +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -8,7 +8,9 @@ + #include <linux/smp.h> + #include <linux/cpumask.h> + #include <linux/printk.h> ++#include <linux/console.h> + #include <linux/kprobes.h> ++#include <linux/delay.h> + + #include "internal.h" + +@@ -45,3 +47,33 @@ asmlinkage int vprintk(const char *fmt, va_list args) + return vprintk_default(fmt, args); + } + EXPORT_SYMBOL(vprintk); ++ ++/** ++ * try_block_console_kthreads() - Try to block console kthreads and ++ * make the global console_lock() avaialble ++ * ++ * @timeout_ms: The maximum time (in ms) to wait. ++ * ++ * Prevent console kthreads from starting processing new messages. Wait ++ * until the global console_lock() become available. ++ * ++ * Context: Can be called in any context. ++ */ ++void try_block_console_kthreads(int timeout_ms) ++{ ++ block_console_kthreads = true; ++ ++ /* Do not wait when the console lock could not be safely taken. */ ++ if (this_cpu_read(printk_context) || in_nmi()) ++ return; ++ ++ while (timeout_ms > 0) { ++ if (console_trylock()) { ++ console_unlock(); ++ return; ++ } ++ ++ udelay(1000); ++ timeout_ms -= 1; ++ } ++} +diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h +index 7d15b5b5a235..7b411d663e8f 100644 +--- a/kernel/rcu/tree_stall.h ++++ b/kernel/rcu/tree_stall.h +@@ -648,6 +648,7 @@ static void print_cpu_stall(unsigned long gps) + * See Documentation/RCU/stallwarn.rst for info on how to debug + * RCU CPU stall warnings. + */ ++ printk_prefer_direct_enter(); + trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected")); + pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); + raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); +@@ -682,6 +683,7 @@ static void print_cpu_stall(unsigned long gps) + */ + set_tsk_need_resched(current); + set_preempt_need_resched(); ++ printk_prefer_direct_exit(); + } + + static void check_cpu_stall(struct rcu_data *rdp) +diff --git a/kernel/reboot.c b/kernel/reboot.c +index 6ebef11c8876..23a8cfed1a72 100644 +--- a/kernel/reboot.c ++++ b/kernel/reboot.c +@@ -83,6 +83,7 @@ void kernel_restart_prepare(char *cmd) + { + blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); + system_state = SYSTEM_RESTART; ++ try_block_console_kthreads(10000); + usermodehelper_disable(); + device_shutdown(); + } +@@ -283,6 +284,7 @@ static void kernel_shutdown_prepare(enum system_states state) + blocking_notifier_call_chain(&reboot_notifier_list, + (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); + system_state = state; ++ try_block_console_kthreads(10000); + usermodehelper_disable(); + device_shutdown(); + } +@@ -837,9 +839,11 @@ static int __orderly_reboot(void) + ret = run_cmd(reboot_cmd); + + if (ret) { ++ printk_prefer_direct_enter(); + pr_warn("Failed to start orderly reboot: forcing the issue\n"); + emergency_sync(); + kernel_restart(NULL); ++ printk_prefer_direct_exit(); + } + + return ret; +@@ -852,6 +856,7 @@ static int __orderly_poweroff(bool force) + ret = run_cmd(poweroff_cmd); + + if (ret && force) { ++ printk_prefer_direct_enter(); + pr_warn("Failed to start orderly shutdown: forcing the issue\n"); + + /* +@@ -861,6 +866,7 @@ static int __orderly_poweroff(bool force) + */ + emergency_sync(); + kernel_power_off(); ++ printk_prefer_direct_exit(); + } + + return ret; +@@ -918,6 +924,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot); + */ + static void hw_failure_emergency_poweroff_func(struct work_struct *work) + { ++ printk_prefer_direct_enter(); ++ + /* + * We have reached here after the emergency shutdown waiting period has + * expired. This means orderly_poweroff has not been able to shut off +@@ -934,6 +942,8 @@ static void hw_failure_emergency_poweroff_func(struct work_struct *work) + */ + pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n"); + emergency_restart(); ++ ++ printk_prefer_direct_exit(); + } + + static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, +@@ -972,11 +982,13 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) + { + static atomic_t allow_proceed = ATOMIC_INIT(1); + ++ printk_prefer_direct_enter(); ++ + pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason); + + /* Shutdown should be initiated only once. */ + if (!atomic_dec_and_test(&allow_proceed)) +- return; ++ goto out; + + /* + * Queue a backup emergency shutdown in the event of +@@ -984,6 +996,8 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) + */ + hw_failure_emergency_poweroff(ms_until_forced); + orderly_poweroff(true); ++out: ++ printk_prefer_direct_exit(); + } + EXPORT_SYMBOL_GPL(hw_protection_shutdown); + +diff --git a/kernel/watchdog.c b/kernel/watchdog.c +index 45693fb3e08d..f366008298ac 100644 +--- a/kernel/watchdog.c ++++ b/kernel/watchdog.c +@@ -431,6 +431,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) + /* Start period for the next softlockup warning. */ + update_report_ts(); + ++ printk_prefer_direct_enter(); ++ + pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", + smp_processor_id(), duration, + current->comm, task_pid_nr(current)); +@@ -449,6 +451,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) + add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); + if (softlockup_panic) + panic("softlockup: hung tasks"); ++ ++ printk_prefer_direct_exit(); + } + + return HRTIMER_RESTART; +diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c +index 1e8a49dc956e..7c977b945c92 100644 +--- a/kernel/watchdog_hld.c ++++ b/kernel/watchdog_hld.c +@@ -135,6 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event, + if (__this_cpu_read(hard_watchdog_warn) == true) + return; + ++ printk_prefer_direct_enter(); ++ + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", + this_cpu); + print_modules(); +@@ -155,6 +157,8 @@ static void watchdog_overflow_callback(struct perf_event *event, + if (hardlockup_panic) + nmi_panic(regs, "Hard LOCKUP"); + ++ printk_prefer_direct_exit(); ++ + __this_cpu_write(hard_watchdog_warn, true); + return; + } +-- +2.43.0 + diff --git a/debian/patches-rt/0020-printk-add-infrastucture-for-atomic-consoles.patch b/debian/patches-rt/0020-printk-add-infrastucture-for-atomic-consoles.patch new file mode 100644 index 000000000..b86b20007 --- /dev/null +++ b/debian/patches-rt/0020-printk-add-infrastucture-for-atomic-consoles.patch @@ -0,0 +1,608 @@ +From 18343f23a5f1d466a0c74806983066efba932d5d Mon Sep 17 00:00:00 2001 +From: John Ogness <john.ogness@linutronix.de> +Date: Fri, 4 Feb 2022 16:01:17 +0106 +Subject: [PATCH 20/62] printk: add infrastucture for atomic consoles +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Many times it is not possible to see the console output on +panic because printing threads cannot be scheduled and/or the +console is already taken and forcibly overtaking/busting the +locks does provide the hoped results. + +Introduce a new infrastructure to support "atomic consoles". +A new optional callback in struct console, write_atomic(), is +available for consoles to provide an implemention for writing +console messages. The implementation must be NMI safe if they +can run on an architecture where NMIs exist. + +Console drivers implementing the write_atomic() callback must +also select CONFIG_HAVE_ATOMIC_CONSOLE in order to enable the +atomic console code within the printk subsystem. + +If atomic consoles are available, panic() will flush the kernel +log only to the atomic consoles (before busting spinlocks). +Afterwards, panic() will continue as before, which includes +attempting to flush the other (non-atomic) consoles. + +Signed-off-by: John Ogness <john.ogness@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + include/linux/console.h | 16 ++- + init/Kconfig | 4 + + kernel/panic.c | 6 +- + kernel/printk/printk.c | 293 ++++++++++++++++++++++++++++++++++++---- + 4 files changed, 290 insertions(+), 29 deletions(-) + +diff --git a/include/linux/console.h b/include/linux/console.h +index 143653090c48..8a813cbaf928 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -138,9 +138,19 @@ static inline int con_debug_leave(void) + #define CON_BRL (32) /* Used for a braille device */ + #define CON_EXTENDED (64) /* Use the extended output format a la /dev/kmsg */ + ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++struct console_atomic_data { ++ u64 seq; ++ char *text; ++ char *ext_text; ++ char *dropped_text; ++}; ++#endif ++ + struct console { + char name[16]; + void (*write)(struct console *, const char *, unsigned); ++ void (*write_atomic)(struct console *, const char *, unsigned); + int (*read)(struct console *, char *, unsigned); + struct tty_driver *(*device)(struct console *, int *); + void (*unblank)(void); +@@ -153,7 +163,10 @@ struct console { + uint ispeed; + uint ospeed; + u64 seq; +- unsigned long dropped; ++ atomic_long_t dropped; ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++ struct console_atomic_data *atomic_data; ++#endif + struct task_struct *thread; + bool blocked; + +@@ -184,6 +197,7 @@ extern int console_set_on_cmdline; + extern struct console *early_console; + + enum con_flush_mode { ++ CONSOLE_ATOMIC_FLUSH_PENDING, + CONSOLE_FLUSH_PENDING, + CONSOLE_REPLAY_ALL, + }; +diff --git a/init/Kconfig b/init/Kconfig +index de255842f5d0..d45312780b3a 100644 +--- a/init/Kconfig ++++ b/init/Kconfig +@@ -1582,6 +1582,10 @@ config PRINTK + very difficult to diagnose system problems, saying N here is + strongly discouraged. + ++config HAVE_ATOMIC_CONSOLE ++ bool ++ default n ++ + config BUG + bool "BUG() support" if EXPERT + default y +diff --git a/kernel/panic.c b/kernel/panic.c +index 88cd873c7c30..97cc495d95f8 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -322,7 +322,6 @@ void panic(const char *fmt, ...) + panic_smp_self_stop(); + + console_verbose(); +- bust_spinlocks(1); + va_start(args, fmt); + len = vscnprintf(buf, sizeof(buf), fmt, args); + va_end(args); +@@ -339,6 +338,11 @@ void panic(const char *fmt, ...) + dump_stack(); + #endif + ++ /* If atomic consoles are available, flush the kernel log. */ ++ console_flush_on_panic(CONSOLE_ATOMIC_FLUSH_PENDING); ++ ++ bust_spinlocks(1); ++ + /* + * If kgdb is enabled, give it a chance to run before we stop all + * the other CPUs or else we won't be able to debug processes left +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index e9f9b66608a0..73b1727087c7 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -44,6 +44,7 @@ + #include <linux/irq_work.h> + #include <linux/ctype.h> + #include <linux/uio.h> ++#include <linux/clocksource.h> + #include <linux/sched/clock.h> + #include <linux/sched/debug.h> + #include <linux/sched/task_stack.h> +@@ -2060,19 +2061,28 @@ static int console_trylock_spinning(void) + * dropped, a dropped message will be written out first. + */ + static void call_console_driver(struct console *con, const char *text, size_t len, +- char *dropped_text) ++ char *dropped_text, bool atomic_printing) + { ++ unsigned long dropped = 0; + size_t dropped_len; + +- if (con->dropped && dropped_text) { ++ if (dropped_text) ++ dropped = atomic_long_xchg_relaxed(&con->dropped, 0); ++ ++ if (dropped) { + dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX, + "** %lu printk messages dropped **\n", +- con->dropped); +- con->dropped = 0; +- con->write(con, dropped_text, dropped_len); ++ dropped); ++ if (atomic_printing) ++ con->write_atomic(con, dropped_text, dropped_len); ++ else ++ con->write(con, dropped_text, dropped_len); + } + +- con->write(con, text, len); ++ if (atomic_printing) ++ con->write_atomic(con, text, len); ++ else ++ con->write(con, text, len); + } + + /* +@@ -2430,6 +2440,76 @@ asmlinkage __visible int _printk(const char *fmt, ...) + } + EXPORT_SYMBOL(_printk); + ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++static void __free_atomic_data(struct console_atomic_data *d) ++{ ++ kfree(d->text); ++ kfree(d->ext_text); ++ kfree(d->dropped_text); ++} ++ ++static void free_atomic_data(struct console_atomic_data *d) ++{ ++ int count = 1; ++ int i; ++ ++ if (!d) ++ return; ++ ++#ifdef CONFIG_HAVE_NMI ++ count = 2; ++#endif ++ ++ for (i = 0; i < count; i++) ++ __free_atomic_data(&d[i]); ++ kfree(d); ++} ++ ++static int __alloc_atomic_data(struct console_atomic_data *d, short flags) ++{ ++ d->text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); ++ if (!d->text) ++ return -1; ++ ++ if (flags & CON_EXTENDED) { ++ d->ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); ++ if (!d->ext_text) ++ return -1; ++ } else { ++ d->dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); ++ if (!d->dropped_text) ++ return -1; ++ } ++ ++ return 0; ++} ++ ++static struct console_atomic_data *alloc_atomic_data(short flags) ++{ ++ struct console_atomic_data *d; ++ int count = 1; ++ int i; ++ ++#ifdef CONFIG_HAVE_NMI ++ count = 2; ++#endif ++ ++ d = kzalloc(sizeof(*d) * count, GFP_KERNEL); ++ if (!d) ++ goto err_out; ++ ++ for (i = 0; i < count; i++) { ++ if (__alloc_atomic_data(&d[i], flags) != 0) ++ goto err_out; ++ } ++ ++ return d; ++err_out: ++ free_atomic_data(d); ++ return NULL; ++} ++#endif /* CONFIG_HAVE_ATOMIC_CONSOLE */ ++ + static bool pr_flush(int timeout_ms, bool reset_on_progress); + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); + +@@ -2445,6 +2525,8 @@ static void printk_start_kthread(struct console *con); + #define prb_first_valid_seq(rb) 0 + #define prb_next_seq(rb) 0 + ++#define free_atomic_data(d) ++ + static u64 syslog_seq; + + static size_t record_print_text(const struct printk_record *r, +@@ -2463,7 +2545,7 @@ static ssize_t msg_print_ext_body(char *buf, size_t size, + static void console_lock_spinning_enable(void) { } + static int console_lock_spinning_disable_and_check(void) { return 0; } + static void call_console_driver(struct console *con, const char *text, size_t len, +- char *dropped_text) ++ char *dropped_text, bool atomic_printing) + { + } + static bool suppress_message_printing(int level) { return false; } +@@ -2819,10 +2901,20 @@ static inline bool __console_is_usable(short flags) + * + * Requires holding the console_lock. + */ +-static inline bool console_is_usable(struct console *con) ++static inline bool console_is_usable(struct console *con, bool atomic_printing) + { +- if (!con->write) ++ if (atomic_printing) { ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++ if (!con->write_atomic) ++ return false; ++ if (!con->atomic_data) ++ return false; ++#else ++ return false; ++#endif ++ } else if (!con->write) { + return false; ++ } + + return __console_is_usable(con->flags); + } +@@ -2847,6 +2939,66 @@ static void __console_unlock(void) + up_console_sem(); + } + ++static u64 read_console_seq(struct console *con) ++{ ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++ unsigned long flags; ++ u64 seq2; ++ u64 seq; ++ ++ if (!con->atomic_data) ++ return con->seq; ++ ++ printk_cpu_sync_get_irqsave(flags); ++ ++ seq = con->seq; ++ seq2 = con->atomic_data[0].seq; ++ if (seq2 > seq) ++ seq = seq2; ++#ifdef CONFIG_HAVE_NMI ++ seq2 = con->atomic_data[1].seq; ++ if (seq2 > seq) ++ seq = seq2; ++#endif ++ ++ printk_cpu_sync_put_irqrestore(flags); ++ ++ return seq; ++#else /* CONFIG_HAVE_ATOMIC_CONSOLE */ ++ return con->seq; ++#endif ++} ++ ++static void write_console_seq(struct console *con, u64 val, bool atomic_printing) ++{ ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++ unsigned long flags; ++ u64 *seq; ++ ++ if (!con->atomic_data) { ++ con->seq = val; ++ return; ++ } ++ ++ printk_cpu_sync_get_irqsave(flags); ++ ++ if (atomic_printing) { ++ seq = &con->atomic_data[0].seq; ++#ifdef CONFIG_HAVE_NMI ++ if (in_nmi()) ++ seq = &con->atomic_data[1].seq; ++#endif ++ } else { ++ seq = &con->seq; ++ } ++ *seq = val; ++ ++ printk_cpu_sync_put_irqrestore(flags); ++#else /* CONFIG_HAVE_ATOMIC_CONSOLE */ ++ con->seq = val; ++#endif ++} ++ + /* + * Print one record for the given console. The record printed is whatever + * record is the next available record for the given console. +@@ -2859,6 +3011,8 @@ static void __console_unlock(void) + * If dropped messages should be printed, @dropped_text is a buffer of size + * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL. + * ++ * @atomic_printing specifies if atomic printing should be used. ++ * + * @handover will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding the + * console_lock. Otherwise it is set to false. A NULL pointer may be provided +@@ -2871,7 +3025,8 @@ static void __console_unlock(void) + * Requires con->lock otherwise. + */ + static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, +- char *dropped_text, bool *handover) ++ char *dropped_text, bool atomic_printing, ++ bool *handover) + { + static atomic_t panic_console_dropped = ATOMIC_INIT(0); + struct printk_info info; +@@ -2879,18 +3034,22 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex + unsigned long flags; + char *write_text; + size_t len; ++ u64 seq; + + prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); + + if (handover) + *handover = false; + +- if (!prb_read_valid(prb, con->seq, &r)) ++ seq = read_console_seq(con); ++ ++ if (!prb_read_valid(prb, seq, &r)) + return false; + +- if (con->seq != r.info->seq) { +- con->dropped += r.info->seq - con->seq; +- con->seq = r.info->seq; ++ if (seq != r.info->seq) { ++ atomic_long_add((unsigned long)(r.info->seq - seq), &con->dropped); ++ write_console_seq(con, r.info->seq, atomic_printing); ++ seq = r.info->seq; + if (panic_in_progress() && + atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { + suppress_panic_printk = 1; +@@ -2900,7 +3059,7 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex + + /* Skip record that has level above the console loglevel. */ + if (suppress_message_printing(r.info->level)) { +- con->seq++; ++ write_console_seq(con, seq + 1, atomic_printing); + goto skip; + } + +@@ -2932,9 +3091,9 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex + stop_critical_timings(); + } + +- call_console_driver(con, write_text, len, dropped_text); ++ call_console_driver(con, write_text, len, dropped_text, atomic_printing); + +- con->seq++; ++ write_console_seq(con, seq + 1, atomic_printing); + + if (handover) { + start_critical_timings(); +@@ -2966,7 +3125,7 @@ static bool console_emit_next_record_transferable(struct console *con, char *tex + handover = NULL; + } + +- return __console_emit_next_record(con, text, ext_text, dropped_text, handover); ++ return __console_emit_next_record(con, text, ext_text, dropped_text, false, handover); + } + + /* +@@ -3014,7 +3173,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + for_each_console(con) { + bool progress; + +- if (!console_is_usable(con)) ++ if (!console_is_usable(con, false)) + continue; + any_usable = true; + +@@ -3049,6 +3208,68 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + return any_usable; + } + ++#if defined(CONFIG_HAVE_ATOMIC_CONSOLE) && defined(CONFIG_PRINTK) ++static bool console_emit_next_record(struct console *con, char *text, char *ext_text, ++ char *dropped_text, bool atomic_printing); ++ ++static void atomic_console_flush_all(void) ++{ ++ unsigned long flags; ++ struct console *con; ++ bool any_progress; ++ int index = 0; ++ ++ if (console_suspended) ++ return; ++ ++#ifdef CONFIG_HAVE_NMI ++ if (in_nmi()) ++ index = 1; ++#endif ++ ++ printk_cpu_sync_get_irqsave(flags); ++ ++ do { ++ any_progress = false; ++ ++ for_each_console(con) { ++ bool progress; ++ ++ if (!console_is_usable(con, true)) ++ continue; ++ ++ if (con->flags & CON_EXTENDED) { ++ /* Extended consoles do not print "dropped messages". */ ++ progress = console_emit_next_record(con, ++ &con->atomic_data->text[index], ++ &con->atomic_data->ext_text[index], ++ NULL, ++ true); ++ } else { ++ progress = console_emit_next_record(con, ++ &con->atomic_data->text[index], ++ NULL, ++ &con->atomic_data->dropped_text[index], ++ true); ++ } ++ ++ if (!progress) ++ continue; ++ any_progress = true; ++ ++ touch_softlockup_watchdog_sync(); ++ clocksource_touch_watchdog(); ++ rcu_cpu_stall_reset(); ++ touch_nmi_watchdog(); ++ } ++ } while (any_progress); ++ ++ printk_cpu_sync_put_irqrestore(flags); ++} ++#else /* CONFIG_HAVE_ATOMIC_CONSOLE && CONFIG_PRINTK */ ++#define atomic_console_flush_all() ++#endif ++ + /** + * console_unlock - unlock the console system + * +@@ -3164,6 +3385,11 @@ void console_unblank(void) + */ + void console_flush_on_panic(enum con_flush_mode mode) + { ++ if (mode == CONSOLE_ATOMIC_FLUSH_PENDING) { ++ atomic_console_flush_all(); ++ return; ++ } ++ + /* + * If someone else is holding the console lock, trylock will fail + * and may_schedule may be set. Ignore and proceed to unlock so +@@ -3180,7 +3406,7 @@ void console_flush_on_panic(enum con_flush_mode mode) + + seq = prb_first_valid_seq(prb); + for_each_console(c) +- c->seq = seq; ++ write_console_seq(c, seq, false); + } + console_unlock(); + } +@@ -3420,19 +3646,22 @@ void register_console(struct console *newcon) + console_drivers->next = newcon; + } + +- newcon->dropped = 0; ++ atomic_long_set(&newcon->dropped, 0); + newcon->thread = NULL; + newcon->blocked = true; + mutex_init(&newcon->lock); ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++ newcon->atomic_data = NULL; ++#endif + + if (newcon->flags & CON_PRINTBUFFER) { + /* Get a consistent copy of @syslog_seq. */ + mutex_lock(&syslog_lock); +- newcon->seq = syslog_seq; ++ write_console_seq(newcon, syslog_seq, false); + mutex_unlock(&syslog_lock); + } else { + /* Begin with next message. */ +- newcon->seq = prb_next_seq(prb); ++ write_console_seq(newcon, prb_next_seq(prb), false); + } + + if (printk_kthreads_available) +@@ -3515,6 +3744,10 @@ int unregister_console(struct console *console) + + console_sysfs_notify(); + ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++ free_atomic_data(console->atomic_data); ++#endif ++ + if (console->exit) + res = console->exit(console); + +@@ -3645,7 +3878,7 @@ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progre + for_each_console(c) { + if (con && con != c) + continue; +- if (!console_is_usable(c)) ++ if (!console_is_usable(c, false)) + continue; + printk_seq = c->seq; + if (printk_seq < seq) +@@ -3734,9 +3967,10 @@ static void printk_fallback_preferred_direct(void) + * See __console_emit_next_record() for argument and return details. + */ + static bool console_emit_next_record(struct console *con, char *text, char *ext_text, +- char *dropped_text) ++ char *dropped_text, bool atomic_printing) + { +- return __console_emit_next_record(con, text, ext_text, dropped_text, NULL); ++ return __console_emit_next_record(con, text, ext_text, dropped_text, ++ atomic_printing, NULL); + } + + static bool printer_should_wake(struct console *con, u64 seq) +@@ -3777,6 +4011,11 @@ static int printk_kthread_func(void *data) + char *text; + int error; + ++#ifdef CONFIG_HAVE_ATOMIC_CONSOLE ++ if (con->write_atomic) ++ con->atomic_data = alloc_atomic_data(con->flags); ++#endif ++ + text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); + if (!text) { + con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); +@@ -3854,7 +4093,7 @@ static int printk_kthread_func(void *data) + * which can conditionally invoke cond_resched(). + */ + console_may_schedule = 0; +- console_emit_next_record(con, text, ext_text, dropped_text); ++ console_emit_next_record(con, text, ext_text, dropped_text, false); + + seq = con->seq; + +-- +2.43.0 + diff --git a/debian/patches-rt/0021-serial-8250-implement-write_atomic.patch b/debian/patches-rt/0021-serial-8250-implement-write_atomic.patch new file mode 100644 index 000000000..6a5f5abe2 --- /dev/null +++ b/debian/patches-rt/0021-serial-8250-implement-write_atomic.patch @@ -0,0 +1,938 @@ +From 08b8c0b589806331dc645a8ead6be51c174d93e0 Mon Sep 17 00:00:00 2001 +From: John Ogness <john.ogness@linutronix.de> +Date: Fri, 4 Feb 2022 16:01:17 +0106 +Subject: [PATCH 21/62] serial: 8250: implement write_atomic +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Implement a non-sleeping NMI-safe write_atomic() console function in +order to support atomic console printing during a panic. + +Trasmitting data requires disabling interrupts. Since write_atomic() +can be called from any context, it may be called while another CPU +is executing in console code. In order to maintain the correct state +of the IER register, use the global cpu_sync to synchronize all +access to the IER register. This synchronization is only necessary +for serial ports that are being used as consoles. + +The global cpu_sync is also used to synchronize between the write() +and write_atomic() callbacks. write() synchronizes per character, +write_atomic() synchronizes per line. + +Signed-off-by: John Ogness <john.ogness@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/tty/serial/8250/8250.h | 41 ++++- + drivers/tty/serial/8250/8250_aspeed_vuart.c | 2 +- + drivers/tty/serial/8250/8250_bcm7271.c | 21 ++- + drivers/tty/serial/8250/8250_core.c | 24 ++- + drivers/tty/serial/8250/8250_exar.c | 4 +- + drivers/tty/serial/8250/8250_fsl.c | 3 +- + drivers/tty/serial/8250/8250_ingenic.c | 3 +- + drivers/tty/serial/8250/8250_mtk.c | 32 +++- + drivers/tty/serial/8250/8250_omap.c | 18 +-- + drivers/tty/serial/8250/8250_port.c | 158 ++++++++++++++++---- + drivers/tty/serial/8250/Kconfig | 1 + + include/linux/serial_8250.h | 5 + + 12 files changed, 261 insertions(+), 51 deletions(-) + +diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h +index eeb7b43ebe53..b17715d340c3 100644 +--- a/drivers/tty/serial/8250/8250.h ++++ b/drivers/tty/serial/8250/8250.h +@@ -176,12 +176,49 @@ static inline void serial_dl_write(struct uart_8250_port *up, int value) + up->dl_write(up, value); + } + ++static inline int serial8250_in_IER(struct uart_8250_port *up) ++{ ++ struct uart_port *port = &up->port; ++ unsigned long flags; ++ bool is_console; ++ int ier; ++ ++ is_console = uart_console(port); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(flags); ++ ++ ier = serial_in(up, UART_IER); ++ ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(flags); ++ ++ return ier; ++} ++ ++static inline void serial8250_set_IER(struct uart_8250_port *up, int ier) ++{ ++ struct uart_port *port = &up->port; ++ unsigned long flags; ++ bool is_console; ++ ++ is_console = uart_console(port); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(flags); ++ ++ serial_out(up, UART_IER, ier); ++ ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(flags); ++} ++ + static inline bool serial8250_set_THRI(struct uart_8250_port *up) + { + if (up->ier & UART_IER_THRI) + return false; + up->ier |= UART_IER_THRI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + return true; + } + +@@ -190,7 +227,7 @@ static inline bool serial8250_clear_THRI(struct uart_8250_port *up) + if (!(up->ier & UART_IER_THRI)) + return false; + up->ier &= ~UART_IER_THRI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + return true; + } + +diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c +index 9d2a7856784f..7cc6b527c088 100644 +--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c ++++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c +@@ -278,7 +278,7 @@ static void __aspeed_vuart_set_throttle(struct uart_8250_port *up, + up->ier &= ~irqs; + if (!throttle) + up->ier |= irqs; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + } + static void aspeed_vuart_set_throttle(struct uart_port *port, bool throttle) + { +diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c +index ffc7f67e27e3..8b211e668bc0 100644 +--- a/drivers/tty/serial/8250/8250_bcm7271.c ++++ b/drivers/tty/serial/8250/8250_bcm7271.c +@@ -609,7 +609,7 @@ static int brcmuart_startup(struct uart_port *port) + * will handle this. + */ + up->ier &= ~UART_IER_RDI; +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + + priv->tx_running = false; + priv->dma.rx_dma = NULL; +@@ -775,10 +775,12 @@ static int brcmuart_handle_irq(struct uart_port *p) + unsigned int iir = serial_port_in(p, UART_IIR); + struct brcmuart_priv *priv = p->private_data; + struct uart_8250_port *up = up_to_u8250p(p); ++ unsigned long cs_flags; + unsigned int status; + unsigned long flags; + unsigned int ier; + unsigned int mcr; ++ bool is_console; + int handled = 0; + + /* +@@ -789,6 +791,10 @@ static int brcmuart_handle_irq(struct uart_port *p) + spin_lock_irqsave(&p->lock, flags); + status = serial_port_in(p, UART_LSR); + if ((status & UART_LSR_DR) == 0) { ++ is_console = uart_console(p); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(cs_flags); + + ier = serial_port_in(p, UART_IER); + /* +@@ -809,6 +815,9 @@ static int brcmuart_handle_irq(struct uart_port *p) + serial_port_in(p, UART_RX); + } + ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(cs_flags); ++ + handled = 1; + } + spin_unlock_irqrestore(&p->lock, flags); +@@ -823,8 +832,10 @@ static enum hrtimer_restart brcmuart_hrtimer_func(struct hrtimer *t) + struct brcmuart_priv *priv = container_of(t, struct brcmuart_priv, hrt); + struct uart_port *p = priv->up; + struct uart_8250_port *up = up_to_u8250p(p); ++ unsigned long cs_flags; + unsigned int status; + unsigned long flags; ++ bool is_console; + + if (priv->shutdown) + return HRTIMER_NORESTART; +@@ -846,12 +857,20 @@ static enum hrtimer_restart brcmuart_hrtimer_func(struct hrtimer *t) + /* re-enable receive unless upper layer has disabled it */ + if ((up->ier & (UART_IER_RLSI | UART_IER_RDI)) == + (UART_IER_RLSI | UART_IER_RDI)) { ++ is_console = uart_console(p); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(cs_flags); ++ + status = serial_port_in(p, UART_IER); + status |= (UART_IER_RLSI | UART_IER_RDI); + serial_port_out(p, UART_IER, status); + status = serial_port_in(p, UART_MCR); + status |= UART_MCR_RTS; + serial_port_out(p, UART_MCR, status); ++ ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(cs_flags); + } + spin_unlock_irqrestore(&p->lock, flags); + return HRTIMER_NORESTART; +diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c +index 81a5dab1a828..536f639ff56c 100644 +--- a/drivers/tty/serial/8250/8250_core.c ++++ b/drivers/tty/serial/8250/8250_core.c +@@ -255,8 +255,11 @@ static void serial8250_timeout(struct timer_list *t) + static void serial8250_backup_timeout(struct timer_list *t) + { + struct uart_8250_port *up = from_timer(up, t, timer); ++ struct uart_port *port = &up->port; + unsigned int iir, ier = 0, lsr; ++ unsigned long cs_flags; + unsigned long flags; ++ bool is_console; + + spin_lock_irqsave(&up->port.lock, flags); + +@@ -265,8 +268,16 @@ static void serial8250_backup_timeout(struct timer_list *t) + * based handler. + */ + if (up->port.irq) { ++ is_console = uart_console(port); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(cs_flags); ++ + ier = serial_in(up, UART_IER); + serial_out(up, UART_IER, 0); ++ ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(cs_flags); + } + + iir = serial_in(up, UART_IIR); +@@ -289,7 +300,7 @@ static void serial8250_backup_timeout(struct timer_list *t) + serial8250_tx_chars(up); + + if (up->port.irq) +- serial_out(up, UART_IER, ier); ++ serial8250_set_IER(up, ier); + + spin_unlock_irqrestore(&up->port.lock, flags); + +@@ -575,6 +586,14 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) + + #ifdef CONFIG_SERIAL_8250_CONSOLE + ++static void univ8250_console_write_atomic(struct console *co, const char *s, ++ unsigned int count) ++{ ++ struct uart_8250_port *up = &serial8250_ports[co->index]; ++ ++ serial8250_console_write_atomic(up, s, count); ++} ++ + static void univ8250_console_write(struct console *co, const char *s, + unsigned int count) + { +@@ -668,6 +687,7 @@ static int univ8250_console_match(struct console *co, char *name, int idx, + + static struct console univ8250_console = { + .name = "ttyS", ++ .write_atomic = univ8250_console_write_atomic, + .write = univ8250_console_write, + .device = uart_console_device, + .setup = univ8250_console_setup, +@@ -961,7 +981,7 @@ static void serial_8250_overrun_backoff_work(struct work_struct *work) + spin_lock_irqsave(&port->lock, flags); + up->ier |= UART_IER_RLSI | UART_IER_RDI; + up->port.read_status_mask |= UART_LSR_DR; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + spin_unlock_irqrestore(&port->lock, flags); + } + +diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c +index b406cba10b0e..246c32c75a4c 100644 +--- a/drivers/tty/serial/8250/8250_exar.c ++++ b/drivers/tty/serial/8250/8250_exar.c +@@ -189,6 +189,8 @@ static void xr17v35x_set_divisor(struct uart_port *p, unsigned int baud, + + static int xr17v35x_startup(struct uart_port *port) + { ++ struct uart_8250_port *up = up_to_u8250p(port); ++ + /* + * First enable access to IER [7:5], ISR [5:4], FCR [5:4], + * MCR [7:5] and MSR [7:0] +@@ -199,7 +201,7 @@ static int xr17v35x_startup(struct uart_port *port) + * Make sure all interrups are masked until initialization is + * complete and the FIFOs are cleared + */ +- serial_port_out(port, UART_IER, 0); ++ serial8250_set_IER(up, 0); + + return serial8250_do_startup(port); + } +diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c +index 8adfaa183f77..eaf148245a10 100644 +--- a/drivers/tty/serial/8250/8250_fsl.c ++++ b/drivers/tty/serial/8250/8250_fsl.c +@@ -58,7 +58,8 @@ int fsl8250_handle_irq(struct uart_port *port) + if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { + unsigned long delay; + +- up->ier = port->serial_in(port, UART_IER); ++ up->ier = serial8250_in_IER(up); ++ + if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { + port->ops->stop_rx(port); + } else { +diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c +index 2b2f5d8d24b9..2b78e6c394fb 100644 +--- a/drivers/tty/serial/8250/8250_ingenic.c ++++ b/drivers/tty/serial/8250/8250_ingenic.c +@@ -146,6 +146,7 @@ OF_EARLYCON_DECLARE(x1000_uart, "ingenic,x1000-uart", + + static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) + { ++ struct uart_8250_port *up = up_to_u8250p(p); + int ier; + + switch (offset) { +@@ -167,7 +168,7 @@ static void ingenic_uart_serial_out(struct uart_port *p, int offset, int value) + * If we have enabled modem status IRQs we should enable + * modem mode. + */ +- ier = p->serial_in(p, UART_IER); ++ ier = serial8250_in_IER(up); + + if (ier & UART_IER_MSI) + value |= UART_MCR_MDCE | UART_MCR_FCM; +diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c +index fb1d5ec0940e..3e7203909d6a 100644 +--- a/drivers/tty/serial/8250/8250_mtk.c ++++ b/drivers/tty/serial/8250/8250_mtk.c +@@ -222,12 +222,40 @@ static void mtk8250_shutdown(struct uart_port *port) + + static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) + { +- serial_out(up, UART_IER, serial_in(up, UART_IER) & (~mask)); ++ struct uart_port *port = &up->port; ++ unsigned long flags; ++ bool is_console; ++ int ier; ++ ++ is_console = uart_console(port); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(flags); ++ ++ ier = serial_in(up, UART_IER); ++ serial_out(up, UART_IER, ier & (~mask)); ++ ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(flags); + } + + static void mtk8250_enable_intrs(struct uart_8250_port *up, int mask) + { +- serial_out(up, UART_IER, serial_in(up, UART_IER) | mask); ++ struct uart_port *port = &up->port; ++ unsigned long flags; ++ bool is_console; ++ int ier; ++ ++ is_console = uart_console(port); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(flags); ++ ++ ier = serial_in(up, UART_IER); ++ serial_out(up, UART_IER, ier | mask); ++ ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(flags); + } + + static void mtk8250_set_flow_ctrl(struct uart_8250_port *up, int mode) +diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c +index 0b04d810b3e6..2b8ad5176399 100644 +--- a/drivers/tty/serial/8250/8250_omap.c ++++ b/drivers/tty/serial/8250/8250_omap.c +@@ -330,7 +330,7 @@ static void omap8250_restore_regs(struct uart_8250_port *up) + /* drop TCR + TLR access, we setup XON/XOFF later */ + serial8250_out_MCR(up, mcr); + +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + serial_dl_write(up, priv->quot); +@@ -520,7 +520,7 @@ static void omap_8250_pm(struct uart_port *port, unsigned int state, + serial_out(up, UART_EFR, efr | UART_EFR_ECB); + serial_out(up, UART_LCR, 0); + +- serial_out(up, UART_IER, (state != 0) ? UART_IERX_SLEEP : 0); ++ serial8250_set_IER(up, (state != 0) ? UART_IERX_SLEEP : 0); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + serial_out(up, UART_EFR, efr); + serial_out(up, UART_LCR, 0); +@@ -703,7 +703,7 @@ static int omap_8250_startup(struct uart_port *port) + goto err; + + up->ier = UART_IER_RLSI | UART_IER_RDI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + + #ifdef CONFIG_PM + up->capabilities |= UART_CAP_RPM; +@@ -744,7 +744,7 @@ static void omap_8250_shutdown(struct uart_port *port) + serial_out(up, UART_OMAP_EFR2, 0x0); + + up->ier = 0; +- serial_out(up, UART_IER, 0); ++ serial8250_set_IER(up, 0); + + if (up->dma) + serial8250_release_dma(up); +@@ -792,7 +792,7 @@ static void omap_8250_unthrottle(struct uart_port *port) + up->dma->rx_dma(up); + up->ier |= UART_IER_RLSI | UART_IER_RDI; + port->read_status_mask |= UART_LSR_DR; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + spin_unlock_irqrestore(&port->lock, flags); + + pm_runtime_mark_last_busy(port->dev); +@@ -883,7 +883,7 @@ static void __dma_rx_complete(void *param) + __dma_rx_do_complete(p); + if (!priv->throttled) { + p->ier |= UART_IER_RLSI | UART_IER_RDI; +- serial_out(p, UART_IER, p->ier); ++ serial8250_set_IER(p, p->ier); + if (!(priv->habit & UART_HAS_EFR2)) + omap_8250_rx_dma(p); + } +@@ -940,7 +940,7 @@ static int omap_8250_rx_dma(struct uart_8250_port *p) + * callback to run. + */ + p->ier &= ~(UART_IER_RLSI | UART_IER_RDI); +- serial_out(p, UART_IER, p->ier); ++ serial8250_set_IER(p, p->ier); + } + goto out; + } +@@ -1153,12 +1153,12 @@ static void am654_8250_handle_rx_dma(struct uart_8250_port *up, u8 iir, + * periodic timeouts, re-enable interrupts. + */ + up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + omap_8250_rx_dma_flush(up); + serial_in(up, UART_IIR); + serial_out(up, UART_OMAP_EFR2, 0x0); + up->ier |= UART_IER_RLSI | UART_IER_RDI; +- serial_out(up, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + } + } + +diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c +index 8efe31448df3..975c16267196 100644 +--- a/drivers/tty/serial/8250/8250_port.c ++++ b/drivers/tty/serial/8250/8250_port.c +@@ -744,7 +744,7 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) + serial_out(p, UART_EFR, UART_EFR_ECB); + serial_out(p, UART_LCR, 0); + } +- serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); ++ serial8250_set_IER(p, sleep ? UART_IERX_SLEEP : 0); + if (p->capabilities & UART_CAP_EFR) { + serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); + serial_out(p, UART_EFR, efr); +@@ -755,12 +755,29 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) + serial8250_rpm_put(p); + } + +-static void serial8250_clear_IER(struct uart_8250_port *up) ++static unsigned int serial8250_clear_IER(struct uart_8250_port *up) + { ++ struct uart_port *port = &up->port; ++ unsigned int clearval = 0; ++ unsigned long flags; ++ bool is_console; ++ unsigned int prior; ++ ++ is_console = uart_console(port); ++ + if (up->capabilities & UART_CAP_UUE) +- serial_out(up, UART_IER, UART_IER_UUE); +- else +- serial_out(up, UART_IER, 0); ++ clearval = UART_IER_UUE; ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(flags); ++ ++ prior = serial_in(up, UART_IER); ++ serial_out(up, UART_IER, clearval); ++ ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(flags); ++ ++ return prior; + } + + #ifdef CONFIG_SERIAL_8250_RSA +@@ -1026,8 +1043,11 @@ static int broken_efr(struct uart_8250_port *up) + */ + static void autoconfig_16550a(struct uart_8250_port *up) + { ++ struct uart_port *port = &up->port; + unsigned char status1, status2; + unsigned int iersave; ++ unsigned long flags; ++ bool is_console; + + up->port.type = PORT_16550A; + up->capabilities |= UART_CAP_FIFO; +@@ -1139,6 +1159,11 @@ static void autoconfig_16550a(struct uart_8250_port *up) + return; + } + ++ is_console = uart_console(port); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(flags); ++ + /* + * Try writing and reading the UART_IER_UUE bit (b6). + * If it works, this is probably one of the Xscale platform's +@@ -1174,6 +1199,9 @@ static void autoconfig_16550a(struct uart_8250_port *up) + } + serial_out(up, UART_IER, iersave); + ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(flags); ++ + /* + * We distinguish between 16550A and U6 16550A by counting + * how many bytes are in the FIFO. +@@ -1196,8 +1224,10 @@ static void autoconfig(struct uart_8250_port *up) + unsigned char status1, scratch, scratch2, scratch3; + unsigned char save_lcr, save_mcr; + struct uart_port *port = &up->port; ++ unsigned long cs_flags; + unsigned long flags; + unsigned int old_capabilities; ++ bool is_console; + + if (!port->iobase && !port->mapbase && !port->membase) + return; +@@ -1215,6 +1245,11 @@ static void autoconfig(struct uart_8250_port *up) + up->bugs = 0; + + if (!(port->flags & UPF_BUGGY_UART)) { ++ is_console = uart_console(port); ++ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(cs_flags); ++ + /* + * Do a simple existence test first; if we fail this, + * there's no point trying anything else. +@@ -1244,6 +1279,10 @@ static void autoconfig(struct uart_8250_port *up) + #endif + scratch3 = serial_in(up, UART_IER) & 0x0f; + serial_out(up, UART_IER, scratch); ++ ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(cs_flags); ++ + if (scratch2 != 0 || scratch3 != 0x0F) { + /* + * We failed; there's nothing here +@@ -1367,7 +1406,9 @@ static void autoconfig_irq(struct uart_8250_port *up) + unsigned char save_mcr, save_ier; + unsigned char save_ICP = 0; + unsigned int ICP = 0; ++ unsigned long flags; + unsigned long irqs; ++ bool is_console; + int irq; + + if (port->flags & UPF_FOURPORT) { +@@ -1377,8 +1418,12 @@ static void autoconfig_irq(struct uart_8250_port *up) + inb_p(ICP); + } + +- if (uart_console(port)) ++ is_console = uart_console(port); ++ ++ if (is_console) { + console_lock(); ++ printk_cpu_sync_get_irqsave(flags); ++ } + + /* forget possible initially masked and pending IRQ */ + probe_irq_off(probe_irq_on()); +@@ -1410,8 +1455,10 @@ static void autoconfig_irq(struct uart_8250_port *up) + if (port->flags & UPF_FOURPORT) + outb_p(save_ICP, ICP); + +- if (uart_console(port)) ++ if (is_console) { ++ printk_cpu_sync_put_irqrestore(flags); + console_unlock(); ++ } + + port->irq = (irq > 0) ? irq : 0; + } +@@ -1424,7 +1471,7 @@ static void serial8250_stop_rx(struct uart_port *port) + + up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); + up->port.read_status_mask &= ~UART_LSR_DR; +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + + serial8250_rpm_put(up); + } +@@ -1454,7 +1501,7 @@ void serial8250_em485_stop_tx(struct uart_8250_port *p) + serial8250_clear_and_reinit_fifos(p); + + p->ier |= UART_IER_RLSI | UART_IER_RDI; +- serial_port_out(&p->port, UART_IER, p->ier); ++ serial8250_set_IER(p, p->ier); + } + } + EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); +@@ -1703,7 +1750,7 @@ static void serial8250_disable_ms(struct uart_port *port) + mctrl_gpio_disable_ms(up->gpios); + + up->ier &= ~UART_IER_MSI; +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + } + + static void serial8250_enable_ms(struct uart_port *port) +@@ -1719,7 +1766,7 @@ static void serial8250_enable_ms(struct uart_port *port) + up->ier |= UART_IER_MSI; + + serial8250_rpm_get(up); +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + serial8250_rpm_put(up); + } + +@@ -2174,8 +2221,7 @@ static void serial8250_put_poll_char(struct uart_port *port, + /* + * First save the IER then disable the interrupts + */ +- ier = serial_port_in(port, UART_IER); +- serial8250_clear_IER(up); ++ ier = serial8250_clear_IER(up); + + wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); + /* +@@ -2188,7 +2234,7 @@ static void serial8250_put_poll_char(struct uart_port *port, + * and restore the IER + */ + wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); +- serial_port_out(port, UART_IER, ier); ++ serial8250_set_IER(up, ier); + serial8250_rpm_put(up); + } + +@@ -2197,8 +2243,10 @@ static void serial8250_put_poll_char(struct uart_port *port, + int serial8250_do_startup(struct uart_port *port) + { + struct uart_8250_port *up = up_to_u8250p(port); ++ unsigned long cs_flags; + unsigned long flags; + unsigned char iir; ++ bool is_console; + int retval; + u16 lsr; + +@@ -2219,7 +2267,7 @@ int serial8250_do_startup(struct uart_port *port) + up->acr = 0; + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); + serial_port_out(port, UART_EFR, UART_EFR_ECB); +- serial_port_out(port, UART_IER, 0); ++ serial8250_set_IER(up, 0); + serial_port_out(port, UART_LCR, 0); + serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); +@@ -2229,7 +2277,7 @@ int serial8250_do_startup(struct uart_port *port) + + if (port->type == PORT_DA830) { + /* Reset the port */ +- serial_port_out(port, UART_IER, 0); ++ serial8250_set_IER(up, 0); + serial_port_out(port, UART_DA830_PWREMU_MGMT, 0); + mdelay(10); + +@@ -2328,6 +2376,8 @@ int serial8250_do_startup(struct uart_port *port) + if (retval) + goto out; + ++ is_console = uart_console(port); ++ + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { + unsigned char iir1; + +@@ -2344,6 +2394,9 @@ int serial8250_do_startup(struct uart_port *port) + */ + spin_lock_irqsave(&port->lock, flags); + ++ if (is_console) ++ printk_cpu_sync_get_irqsave(cs_flags); ++ + wait_for_xmitr(up, UART_LSR_THRE); + serial_port_out_sync(port, UART_IER, UART_IER_THRI); + udelay(1); /* allow THRE to set */ +@@ -2354,6 +2407,9 @@ int serial8250_do_startup(struct uart_port *port) + iir = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); + ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(cs_flags); ++ + spin_unlock_irqrestore(&port->lock, flags); + + if (port->irqflags & IRQF_SHARED) +@@ -2408,10 +2464,14 @@ int serial8250_do_startup(struct uart_port *port) + * Do a quick test to see if we receive an interrupt when we enable + * the TX irq. + */ ++ if (is_console) ++ printk_cpu_sync_get_irqsave(cs_flags); + serial_port_out(port, UART_IER, UART_IER_THRI); + lsr = serial_port_in(port, UART_LSR); + iir = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); ++ if (is_console) ++ printk_cpu_sync_put_irqrestore(cs_flags); + + if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { + if (!(up->bugs & UART_BUG_TXEN)) { +@@ -2443,7 +2503,7 @@ int serial8250_do_startup(struct uart_port *port) + if (up->dma) { + const char *msg = NULL; + +- if (uart_console(port)) ++ if (is_console) + msg = "forbid DMA for kernel console"; + else if (serial8250_request_dma(up)) + msg = "failed to request DMA"; +@@ -2494,7 +2554,7 @@ void serial8250_do_shutdown(struct uart_port *port) + */ + spin_lock_irqsave(&port->lock, flags); + up->ier = 0; +- serial_port_out(port, UART_IER, 0); ++ serial8250_set_IER(up, 0); + spin_unlock_irqrestore(&port->lock, flags); + + synchronize_irq(port->irq); +@@ -2856,7 +2916,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, + if (up->capabilities & UART_CAP_RTOIE) + up->ier |= UART_IER_RTOIE; + +- serial_port_out(port, UART_IER, up->ier); ++ serial8250_set_IER(up, up->ier); + + if (up->capabilities & UART_CAP_EFR) { + unsigned char efr = 0; +@@ -3321,7 +3381,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_defaults); + + #ifdef CONFIG_SERIAL_8250_CONSOLE + +-static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) ++static void serial8250_console_putchar_locked(struct uart_port *port, unsigned char ch) + { + struct uart_8250_port *up = up_to_u8250p(port); + +@@ -3329,6 +3389,18 @@ static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) + serial_port_out(port, UART_TX, ch); + } + ++static void serial8250_console_putchar(struct uart_port *port, unsigned char ch) ++{ ++ struct uart_8250_port *up = up_to_u8250p(port); ++ unsigned long flags; ++ ++ wait_for_xmitr(up, UART_LSR_THRE); ++ ++ printk_cpu_sync_get_irqsave(flags); ++ serial8250_console_putchar_locked(port, ch); ++ printk_cpu_sync_put_irqrestore(flags); ++} ++ + /* + * Restore serial console when h/w power-off detected + */ +@@ -3355,6 +3427,32 @@ static void serial8250_console_restore(struct uart_8250_port *up) + serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); + } + ++void serial8250_console_write_atomic(struct uart_8250_port *up, ++ const char *s, unsigned int count) ++{ ++ struct uart_port *port = &up->port; ++ unsigned long flags; ++ unsigned int ier; ++ ++ printk_cpu_sync_get_irqsave(flags); ++ ++ touch_nmi_watchdog(); ++ ++ ier = serial8250_clear_IER(up); ++ ++ if (atomic_fetch_inc(&up->console_printing)) { ++ uart_console_write(port, "\n", 1, ++ serial8250_console_putchar_locked); ++ } ++ uart_console_write(port, s, count, serial8250_console_putchar_locked); ++ atomic_dec(&up->console_printing); ++ ++ wait_for_xmitr(up, UART_LSR_BOTH_EMPTY); ++ serial8250_set_IER(up, ier); ++ ++ printk_cpu_sync_put_irqrestore(flags); ++} ++ + /* + * Print a string to the serial port using the device FIFO + * +@@ -3400,20 +3498,15 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + struct uart_port *port = &up->port; + unsigned long flags; + unsigned int ier, use_fifo; +- int locked = 1; + + touch_nmi_watchdog(); + +- if (oops_in_progress) +- locked = spin_trylock_irqsave(&port->lock, flags); +- else +- spin_lock_irqsave(&port->lock, flags); ++ spin_lock_irqsave(&port->lock, flags); + + /* + * First save the IER then disable the interrupts + */ +- ier = serial_port_in(port, UART_IER); +- serial8250_clear_IER(up); ++ ier = serial8250_clear_IER(up); + + /* check scratch reg to see if port powered off during system sleep */ + if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { +@@ -3447,10 +3540,12 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + */ + !(up->port.flags & UPF_CONS_FLOW); + ++ atomic_inc(&up->console_printing); + if (likely(use_fifo)) + serial8250_console_fifo_write(up, s, count); + else + uart_console_write(port, s, count, serial8250_console_putchar); ++ atomic_dec(&up->console_printing); + + /* + * Finally, wait for transmitter to become empty +@@ -3463,8 +3558,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + if (em485->tx_stopped) + up->rs485_stop_tx(up); + } +- +- serial_port_out(port, UART_IER, ier); ++ serial8250_set_IER(up, ier); + + /* + * The receive handling will happen properly because the +@@ -3476,8 +3570,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, + if (up->msr_saved_flags) + serial8250_modem_status(up); + +- if (locked) +- spin_unlock_irqrestore(&port->lock, flags); ++ spin_unlock_irqrestore(&port->lock, flags); + } + + static unsigned int probe_baud(struct uart_port *port) +@@ -3497,6 +3590,7 @@ static unsigned int probe_baud(struct uart_port *port) + + int serial8250_console_setup(struct uart_port *port, char *options, bool probe) + { ++ struct uart_8250_port *up = up_to_u8250p(port); + int baud = 9600; + int bits = 8; + int parity = 'n'; +@@ -3506,6 +3600,8 @@ int serial8250_console_setup(struct uart_port *port, char *options, bool probe) + if (!port->iobase && !port->membase) + return -ENODEV; + ++ atomic_set(&up->console_printing, 0); ++ + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + else if (probe) +diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig +index 583a340f9934..1f31320820a6 100644 +--- a/drivers/tty/serial/8250/Kconfig ++++ b/drivers/tty/serial/8250/Kconfig +@@ -9,6 +9,7 @@ config SERIAL_8250 + depends on !S390 + select SERIAL_CORE + select SERIAL_MCTRL_GPIO if GPIOLIB ++ select HAVE_ATOMIC_CONSOLE + help + This selects whether you want to include the driver for the standard + serial ports. The standard answer is Y. People who might say N +diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h +index 79b328861c5f..35f44352e641 100644 +--- a/include/linux/serial_8250.h ++++ b/include/linux/serial_8250.h +@@ -7,6 +7,7 @@ + #ifndef _LINUX_SERIAL_8250_H + #define _LINUX_SERIAL_8250_H + ++#include <linux/atomic.h> + #include <linux/serial_core.h> + #include <linux/serial_reg.h> + #include <linux/platform_device.h> +@@ -124,6 +125,8 @@ struct uart_8250_port { + #define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA + unsigned char msr_saved_flags; + ++ atomic_t console_printing; ++ + struct uart_8250_dma *dma; + const struct uart_8250_ops *ops; + +@@ -179,6 +182,8 @@ void serial8250_init_port(struct uart_8250_port *up); + void serial8250_set_defaults(struct uart_8250_port *up); + void serial8250_console_write(struct uart_8250_port *up, const char *s, + unsigned int count); ++void serial8250_console_write_atomic(struct uart_8250_port *up, const char *s, ++ unsigned int count); + int serial8250_console_setup(struct uart_port *port, char *options, bool probe); + int serial8250_console_exit(struct uart_port *port); + +-- +2.43.0 + diff --git a/debian/patches-rt/0022-printk-avoid-preempt_disable-for-PREEMPT_RT.patch b/debian/patches-rt/0022-printk-avoid-preempt_disable-for-PREEMPT_RT.patch new file mode 100644 index 000000000..ff28f79d9 --- /dev/null +++ b/debian/patches-rt/0022-printk-avoid-preempt_disable-for-PREEMPT_RT.patch @@ -0,0 +1,96 @@ +From a549c0dff2a1a3b11b40abc969358d8cb367871d Mon Sep 17 00:00:00 2001 +From: John Ogness <john.ogness@linutronix.de> +Date: Fri, 4 Feb 2022 16:01:17 +0106 +Subject: [PATCH 22/62] printk: avoid preempt_disable() for PREEMPT_RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +During non-normal operation, printk() calls will attempt to +write the messages directly to the consoles. This involves +using console_trylock() to acquire @console_sem. + +Preemption is disabled while directly printing to the consoles +in order to ensure that the printing task is not scheduled away +while holding @console_sem, thus blocking all other printers +and causing delays in printing. + +Commit fd5f7cde1b85 ("printk: Never set console_may_schedule in +console_trylock()") specifically reverted a previous attempt at +allowing preemption while printing. + +However, on PREEMPT_RT systems, disabling preemption while +printing is not allowed because console drivers typically +acquire a spin lock (which under PREEMPT_RT is an rtmutex). +Since direct printing is only used during early boot and +non-panic dumps, the risks of delayed print output for these +scenarios will be accepted under PREEMPT_RT. + +Signed-off-by: John Ogness <john.ogness@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/printk/printk.c | 21 ++++++++++++++++++++- + 1 file changed, 20 insertions(+), 1 deletion(-) + +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index 73b1727087c7..3d0ff49cca29 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -1981,6 +1981,7 @@ static int console_lock_spinning_disable_and_check(void) + return 1; + } + ++#if !IS_ENABLED(CONFIG_PREEMPT_RT) + /** + * console_trylock_spinning - try to get console_lock by busy waiting + * +@@ -2054,6 +2055,7 @@ static int console_trylock_spinning(void) + + return 1; + } ++#endif /* CONFIG_PREEMPT_RT */ + + /* + * Call the specified console driver, asking it to write out the specified +@@ -2393,6 +2395,18 @@ asmlinkage int vprintk_emit(int facility, int level, + + /* If called from the scheduler, we can not call up(). */ + if (!in_sched && allow_direct_printing()) { ++#if IS_ENABLED(CONFIG_PREEMPT_RT) ++ /* ++ * Use the non-spinning trylock since PREEMPT_RT does not ++ * support console lock handovers. ++ * ++ * Direct printing will most likely involve taking spinlocks. ++ * For PREEMPT_RT, this is only allowed if in a preemptible ++ * context. ++ */ ++ if (preemptible() && console_trylock()) ++ console_unlock(); ++#else + /* + * The caller may be holding system-critical or + * timing-sensitive locks. Disable preemption during direct +@@ -2410,6 +2424,7 @@ asmlinkage int vprintk_emit(int facility, int level, + if (console_trylock_spinning()) + console_unlock(); + preempt_enable(); ++#endif + } + + if (in_sched) +@@ -3119,8 +3134,12 @@ static bool console_emit_next_record_transferable(struct console *con, char *tex + /* + * Handovers are only supported if threaded printers are atomically + * blocked. The context taking over the console_lock may be atomic. ++ * ++ * PREEMPT_RT also does not support handovers because the spinning ++ * waiter can cause large latencies. + */ +- if (!console_kthreads_atomically_blocked()) { ++ if (!console_kthreads_atomically_blocked() || ++ IS_ENABLED(CONFIG_PREEMPT_RT)) { + *handover = false; + handover = NULL; + } +-- +2.43.0 + diff --git a/debian/patches-rt/0023-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch b/debian/patches-rt/0023-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch new file mode 100644 index 000000000..dcee3a6dc --- /dev/null +++ b/debian/patches-rt/0023-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch @@ -0,0 +1,62 @@ +From f99a0874dccc1ba9f9ead4473ba09603d10946b4 Mon Sep 17 00:00:00 2001 +From: Mike Galbraith <umgwanakikbuti@gmail.com> +Date: Sat, 27 Feb 2016 08:09:11 +0100 +Subject: [PATCH 23/62] drm/i915: Use preempt_disable/enable_rt() where + recommended +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Mario Kleiner suggest in commit + ad3543ede630f ("drm/intel: Push get_scanout_position() timestamping into kms driver.") + +a spots where preemption should be disabled on PREEMPT_RT. The +difference is that on PREEMPT_RT the intel_uncore::lock disables neither +preemption nor interrupts and so region remains preemptible. + +The area covers only register reads and writes. The part that worries me +is: +- __intel_get_crtc_scanline() the worst case is 100us if no match is + found. + +- intel_crtc_scanlines_since_frame_timestamp() not sure how long this + may take in the worst case. + +It was in the RT queue for a while and nobody complained. +Disable preemption on PREEPMPT_RT during timestamping. + +[bigeasy: patch description.] + +Cc: Mario Kleiner <mario.kleiner.de@gmail.com> +Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c +index f93ffa6626a5..6e9d033cf808 100644 +--- a/drivers/gpu/drm/i915/i915_irq.c ++++ b/drivers/gpu/drm/i915/i915_irq.c +@@ -917,7 +917,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, + */ + spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); + +- /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + + /* Get optional system timestamp before query. */ + if (stime) +@@ -981,7 +982,8 @@ static bool i915_get_crtc_scanoutpos(struct drm_crtc *_crtc, + if (etime) + *etime = ktime_get(); + +- /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); + + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); + +-- +2.43.0 + diff --git a/debian/patches-rt/0024-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch b/debian/patches-rt/0024-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch new file mode 100644 index 000000000..c05066850 --- /dev/null +++ b/debian/patches-rt/0024-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch @@ -0,0 +1,91 @@ +From 7540de1a42a1a7409b4367b67c76480f7e96d25b Mon Sep 17 00:00:00 2001 +From: Mike Galbraith <umgwanakikbuti@gmail.com> +Date: Sat, 27 Feb 2016 09:01:42 +0100 +Subject: [PATCH 24/62] drm/i915: Don't disable interrupts on PREEMPT_RT during + atomic updates +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Commit + 8d7849db3eab7 ("drm/i915: Make sprite updates atomic") + +started disabling interrupts across atomic updates. This breaks on PREEMPT_RT +because within this section the code attempt to acquire spinlock_t locks which +are sleeping locks on PREEMPT_RT. + +According to the comment the interrupts are disabled to avoid random delays and +not required for protection or synchronisation. +If this needs to happen with disabled interrupts on PREEMPT_RT, and the +whole section is restricted to register access then all sleeping locks +need to be acquired before interrupts are disabled and some function +maybe moved after enabling interrupts again. +This includes: +- prepare_to_wait() + finish_wait() due its wake queue. +- drm_crtc_vblank_put() -> vblank_disable_fn() drm_device::vbl_lock. +- skl_pfit_enable(), intel_update_plane(), vlv_atomic_update_fifo() and + maybe others due to intel_uncore::lock +- drm_crtc_arm_vblank_event() due to drm_device::event_lock and + drm_device::vblank_time_lock. + +Don't disable interrupts on PREEMPT_RT during atomic updates. + +[bigeasy: drop local locks, commit message] + +Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/gpu/drm/i915/display/intel_crtc.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/drivers/gpu/drm/i915/display/intel_crtc.c b/drivers/gpu/drm/i915/display/intel_crtc.c +index 6792a9056f46..43cedfef104f 100644 +--- a/drivers/gpu/drm/i915/display/intel_crtc.c ++++ b/drivers/gpu/drm/i915/display/intel_crtc.c +@@ -521,7 +521,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) + */ + intel_psr_wait_for_idle_locked(new_crtc_state); + +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + + crtc->debug.min_vbl = min; + crtc->debug.max_vbl = max; +@@ -546,11 +547,13 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) + break; + } + +- local_irq_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_enable(); + + timeout = schedule_timeout(timeout); + +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + } + + finish_wait(wq, &wait); +@@ -583,7 +586,8 @@ void intel_pipe_update_start(struct intel_crtc_state *new_crtc_state) + return; + + irq_disable: +- local_irq_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_disable(); + } + + #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) +@@ -684,7 +688,8 @@ void intel_pipe_update_end(struct intel_crtc_state *new_crtc_state) + */ + intel_vrr_send_push(new_crtc_state); + +- local_irq_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ local_irq_enable(); + + if (intel_vgpu_active(dev_priv)) + return; +-- +2.43.0 + diff --git a/debian/patches-rt/0025-drm-i915-Don-t-check-for-atomic-context-on-PREEMPT_R.patch b/debian/patches-rt/0025-drm-i915-Don-t-check-for-atomic-context-on-PREEMPT_R.patch new file mode 100644 index 000000000..2bbcdfed9 --- /dev/null +++ b/debian/patches-rt/0025-drm-i915-Don-t-check-for-atomic-context-on-PREEMPT_R.patch @@ -0,0 +1,36 @@ +From d9970290d810087f44a37de7ae6f3638ecddd3a0 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 25 Oct 2021 15:05:18 +0200 +Subject: [PATCH 25/62] drm/i915: Don't check for atomic context on PREEMPT_RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The !in_atomic() check in _wait_for_atomic() triggers on PREEMPT_RT +because the uncore::lock is a spinlock_t and does not disable +preemption or interrupts. + +Changing the uncore:lock to a raw_spinlock_t doubles the worst case +latency on an otherwise idle testbox during testing. Therefore I'm +currently unsure about changing this. + +Link: https://lore.kernel.org/all/20211006164628.s2mtsdd2jdbfyf7g@linutronix.de/ +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/gpu/drm/i915/i915_utils.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h +index 6c14d13364bf..de58855e6926 100644 +--- a/drivers/gpu/drm/i915/i915_utils.h ++++ b/drivers/gpu/drm/i915/i915_utils.h +@@ -294,7 +294,7 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) + #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) + + /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ +-#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) ++#if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) && !defined(CONFIG_PREEMPT_RT) + # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) WARN_ON_ONCE((ATOMIC) && !in_atomic()) + #else + # define _WAIT_FOR_ATOMIC_CHECK(ATOMIC) do { } while (0) +-- +2.43.0 + diff --git a/debian/patches-rt/0026-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch b/debian/patches-rt/0026-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch new file mode 100644 index 000000000..bcd36cace --- /dev/null +++ b/debian/patches-rt/0026-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch @@ -0,0 +1,51 @@ +From 05b1ad300df55e576476a0dee533c90068480372 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Thu, 6 Dec 2018 09:52:20 +0100 +Subject: [PATCH 26/62] drm/i915: Disable tracing points on PREEMPT_RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Luca Abeni reported this: +| BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003 +| CPU: 1 PID: 15203 Comm: kworker/u8:2 Not tainted 4.19.1-rt3 #10 +| Call Trace: +| rt_spin_lock+0x3f/0x50 +| gen6_read32+0x45/0x1d0 [i915] +| g4x_get_vblank_counter+0x36/0x40 [i915] +| trace_event_raw_event_i915_pipe_update_start+0x7d/0xf0 [i915] + +The tracing events use trace_i915_pipe_update_start() among other events +use functions acquire spinlock_t locks which are transformed into +sleeping locks on PREEMPT_RT. A few trace points use +intel_get_crtc_scanline(), others use ->get_vblank_counter() wich also +might acquire a sleeping locks on PREEMPT_RT. +At the time the arguments are evaluated within trace point, preemption +is disabled and so the locks must not be acquired on PREEMPT_RT. + +Based on this I don't see any other way than disable trace points on +PREMPT_RT. + +Reported-by: Luca Abeni <lucabe72@gmail.com> +Cc: Steven Rostedt <rostedt@goodmis.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/gpu/drm/i915/i915_trace.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h +index 37b5c9e9d260..1434485cb536 100644 +--- a/drivers/gpu/drm/i915/i915_trace.h ++++ b/drivers/gpu/drm/i915/i915_trace.h +@@ -6,6 +6,10 @@ + #if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) + #define _I915_TRACE_H_ + ++#ifdef CONFIG_PREEMPT_RT ++#define NOTRACE ++#endif ++ + #include <linux/stringify.h> + #include <linux/types.h> + #include <linux/tracepoint.h> +-- +2.43.0 + diff --git a/debian/patches-rt/0027-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch b/debian/patches-rt/0027-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch new file mode 100644 index 000000000..77fa1b420 --- /dev/null +++ b/debian/patches-rt/0027-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch @@ -0,0 +1,35 @@ +From 1fcbddeff5b9a56382a6ba0aba49578f8cdf9aa4 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 19 Dec 2018 10:47:02 +0100 +Subject: [PATCH 27/62] drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with + NOTRACE +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The order of the header files is important. If this header file is +included after tracepoint.h was included then the NOTRACE here becomes a +nop. Currently this happens for two .c files which use the tracepoitns +behind DRM_I915_LOW_LEVEL_TRACEPOINTS. + +Cc: Steven Rostedt <rostedt@goodmis.org> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + drivers/gpu/drm/i915/i915_trace.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h +index 1434485cb536..73f29d8008f0 100644 +--- a/drivers/gpu/drm/i915/i915_trace.h ++++ b/drivers/gpu/drm/i915/i915_trace.h +@@ -327,7 +327,7 @@ DEFINE_EVENT(i915_request, i915_request_add, + TP_ARGS(rq) + ); + +-#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) ++#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) && !defined(NOTRACE) + DEFINE_EVENT(i915_request, i915_request_guc_submit, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq) +-- +2.43.0 + diff --git a/debian/patches-rt/0028-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch b/debian/patches-rt/0028-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch new file mode 100644 index 000000000..61bd48e8b --- /dev/null +++ b/debian/patches-rt/0028-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch @@ -0,0 +1,48 @@ +From 6dfc680fe2808eaf10a9feed7e3116df60b6032f Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 8 Sep 2021 17:18:00 +0200 +Subject: [PATCH 28/62] drm/i915/gt: Queue and wait for the irq_work item. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Disabling interrupts and invoking the irq_work function directly breaks +on PREEMPT_RT. +PREEMPT_RT does not invoke all irq_work from hardirq context because +some of the user have spinlock_t locking in the callback function. +These locks are then turned into a sleeping locks which can not be +acquired with disabled interrupts. + +Using irq_work_queue() has the benefit that the irqwork will be invoked +in the regular context. In general there is "no" delay between enqueuing +the callback and its invocation because the interrupt is raised right +away on architectures which support it (which includes x86). + +Use irq_work_queue() + irq_work_sync() instead invoking the callback +directly. + +Reported-by: Clark Williams <williams@redhat.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> +--- + drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +index ecc990ec1b95..8d04b10681f0 100644 +--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c ++++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +@@ -312,10 +312,9 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) + /* Kick the work once more to drain the signalers, and disarm the irq */ + irq_work_sync(&b->irq_work); + while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { +- local_irq_disable(); +- signal_irq_work(&b->irq_work); +- local_irq_enable(); ++ irq_work_queue(&b->irq_work); + cond_resched(); ++ irq_work_sync(&b->irq_work); + } + } + +-- +2.43.0 + diff --git a/debian/patches-rt/0029-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch b/debian/patches-rt/0029-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch new file mode 100644 index 000000000..d7696f54a --- /dev/null +++ b/debian/patches-rt/0029-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch @@ -0,0 +1,95 @@ +From 6ca6d59038e0a61a7bb4904310525b1df57a2867 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 8 Sep 2021 19:03:41 +0200 +Subject: [PATCH 29/62] drm/i915/gt: Use spin_lock_irq() instead of + local_irq_disable() + spin_lock() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +execlists_dequeue() is invoked from a function which uses +local_irq_disable() to disable interrupts so the spin_lock() behaves +like spin_lock_irq(). +This breaks PREEMPT_RT because local_irq_disable() + spin_lock() is not +the same as spin_lock_irq(). + +execlists_dequeue_irq() and execlists_dequeue() has each one caller +only. If intel_engine_cs::active::lock is acquired and released with the +_irq suffix then it behaves almost as if execlists_dequeue() would be +invoked with disabled interrupts. The difference is the last part of the +function which is then invoked with enabled interrupts. +I can't tell if this makes a difference. From looking at it, it might +work to move the last unlock at the end of the function as I didn't find +anything that would acquire the lock again. + +Reported-by: Clark Williams <williams@redhat.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> +--- + .../drm/i915/gt/intel_execlists_submission.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +index f903ee1ce06e..f54059b63ea9 100644 +--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c ++++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +@@ -1302,7 +1302,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + * and context switches) submission. + */ + +- spin_lock(&sched_engine->lock); ++ spin_lock_irq(&sched_engine->lock); + + /* + * If the queue is higher priority than the last +@@ -1402,7 +1402,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ +- spin_unlock(&sched_engine->lock); ++ spin_unlock_irq(&sched_engine->lock); + return; + } + } +@@ -1428,7 +1428,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + + if (last && !can_merge_rq(last, rq)) { + spin_unlock(&ve->base.sched_engine->lock); +- spin_unlock(&engine->sched_engine->lock); ++ spin_unlock_irq(&engine->sched_engine->lock); + return; /* leave this for another sibling */ + } + +@@ -1590,7 +1590,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + */ + sched_engine->queue_priority_hint = queue_prio(sched_engine); + i915_sched_engine_reset_on_empty(sched_engine); +- spin_unlock(&sched_engine->lock); ++ spin_unlock_irq(&sched_engine->lock); + + /* + * We can skip poking the HW if we ended up with exactly the same set +@@ -1616,13 +1616,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) + } + } + +-static void execlists_dequeue_irq(struct intel_engine_cs *engine) +-{ +- local_irq_disable(); /* Suspend interrupts across request submission */ +- execlists_dequeue(engine); +- local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ +-} +- + static void clear_ports(struct i915_request **ports, int count) + { + memset_p((void **)ports, NULL, count); +@@ -2476,7 +2469,7 @@ static void execlists_submission_tasklet(struct tasklet_struct *t) + } + + if (!engine->execlists.pending[0]) { +- execlists_dequeue_irq(engine); ++ execlists_dequeue(engine); + start_timeslice(engine); + } + +-- +2.43.0 + diff --git a/debian/patches-rt/0030-drm-i915-Drop-the-irqs_disabled-check.patch b/debian/patches-rt/0030-drm-i915-Drop-the-irqs_disabled-check.patch new file mode 100644 index 000000000..4d49e26f7 --- /dev/null +++ b/debian/patches-rt/0030-drm-i915-Drop-the-irqs_disabled-check.patch @@ -0,0 +1,45 @@ +From 6779399c1acfc91f6e9b0bd4dc4abb4f3cb30c78 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 1 Oct 2021 20:01:03 +0200 +Subject: [PATCH 30/62] drm/i915: Drop the irqs_disabled() check +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The !irqs_disabled() check triggers on PREEMPT_RT even with +i915_sched_engine::lock acquired. The reason is the lock is transformed +into a sleeping lock on PREEMPT_RT and does not disable interrupts. + +There is no need to check for disabled interrupts. The lockdep +annotation below already check if the lock has been acquired by the +caller and will yell if the interrupts are not disabled. + +Remove the !irqs_disabled() check. + +Reported-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/gpu/drm/i915/i915_request.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c +index 7ce126a01cbf..64a032dfaa90 100644 +--- a/drivers/gpu/drm/i915/i915_request.c ++++ b/drivers/gpu/drm/i915/i915_request.c +@@ -609,7 +609,6 @@ bool __i915_request_submit(struct i915_request *request) + + RQ_TRACE(request, "\n"); + +- GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->sched_engine->lock); + + /* +@@ -718,7 +717,6 @@ void __i915_request_unsubmit(struct i915_request *request) + */ + RQ_TRACE(request, "\n"); + +- GEM_BUG_ON(!irqs_disabled()); + lockdep_assert_held(&engine->sched_engine->lock); + + /* +-- +2.43.0 + diff --git a/debian/patches-rt/0031-Revert-drm-i915-Depend-on-PREEMPT_RT.patch b/debian/patches-rt/0031-Revert-drm-i915-Depend-on-PREEMPT_RT.patch new file mode 100644 index 000000000..9ef52335e --- /dev/null +++ b/debian/patches-rt/0031-Revert-drm-i915-Depend-on-PREEMPT_RT.patch @@ -0,0 +1,29 @@ +From fe8c8e1f1ec5c61814ee0c4c90a2cef9d35ecad6 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Mon, 21 Feb 2022 17:59:14 +0100 +Subject: [PATCH 31/62] Revert "drm/i915: Depend on !PREEMPT_RT." +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Once the known issues are addressed, it should be safe to enable the +driver. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + drivers/gpu/drm/i915/Kconfig | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig +index 6b10868ec72f..1fbdb7b4e6e1 100644 +--- a/drivers/gpu/drm/i915/Kconfig ++++ b/drivers/gpu/drm/i915/Kconfig +@@ -3,7 +3,6 @@ config DRM_I915 + tristate "Intel 8xx/9xx/G3x/G4x/HD Graphics" + depends on DRM + depends on X86 && PCI +- depends on !PREEMPT_RT + select INTEL_GTT if X86 + select INTERVAL_TREE + # we need shmfs for the swappable backing store, and in particular +-- +2.43.0 + diff --git a/debian/patches-rt/0032-sched-Add-support-for-lazy-preemption.patch b/debian/patches-rt/0032-sched-Add-support-for-lazy-preemption.patch new file mode 100644 index 000000000..54c21bbb9 --- /dev/null +++ b/debian/patches-rt/0032-sched-Add-support-for-lazy-preemption.patch @@ -0,0 +1,714 @@ +From 87194c420f8ef3b1a8b9b63ae640180e2414e8c4 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri, 26 Oct 2012 18:50:54 +0100 +Subject: [PATCH 32/62] sched: Add support for lazy preemption +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +It has become an obsession to mitigate the determinism vs. throughput +loss of RT. Looking at the mainline semantics of preemption points +gives a hint why RT sucks throughput wise for ordinary SCHED_OTHER +tasks. One major issue is the wakeup of tasks which are right away +preempting the waking task while the waking task holds a lock on which +the woken task will block right after having preempted the wakee. In +mainline this is prevented due to the implicit preemption disable of +spin/rw_lock held regions. On RT this is not possible due to the fully +preemptible nature of sleeping spinlocks. + +Though for a SCHED_OTHER task preempting another SCHED_OTHER task this +is really not a correctness issue. RT folks are concerned about +SCHED_FIFO/RR tasks preemption and not about the purely fairness +driven SCHED_OTHER preemption latencies. + +So I introduced a lazy preemption mechanism which only applies to +SCHED_OTHER tasks preempting another SCHED_OTHER task. Aside of the +existing preempt_count each tasks sports now a preempt_lazy_count +which is manipulated on lock acquiry and release. This is slightly +incorrect as for lazyness reasons I coupled this on +migrate_disable/enable so some other mechanisms get the same treatment +(e.g. get_cpu_light). + +Now on the scheduler side instead of setting NEED_RESCHED this sets +NEED_RESCHED_LAZY in case of a SCHED_OTHER/SCHED_OTHER preemption and +therefor allows to exit the waking task the lock held region before +the woken task preempts. That also works better for cross CPU wakeups +as the other side can stay in the adaptive spinning loop. + +For RT class preemption there is no change. This simply sets +NEED_RESCHED and forgoes the lazy preemption counter. + + Initial test do not expose any observable latency increasement, but +history shows that I've been proven wrong before :) + +The lazy preemption mode is per default on, but with +CONFIG_SCHED_DEBUG enabled it can be disabled via: + + # echo NO_PREEMPT_LAZY >/sys/kernel/debug/sched_features + +and reenabled via + + # echo PREEMPT_LAZY >/sys/kernel/debug/sched_features + +The test results so far are very machine and workload dependent, but +there is a clear trend that it enhances the non RT workload +performance. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + include/linux/preempt.h | 54 ++++++++++++++++++++++-- + include/linux/sched.h | 37 +++++++++++++++++ + include/linux/thread_info.h | 12 +++++- + include/linux/trace_events.h | 10 ++++- + kernel/Kconfig.preempt | 6 +++ + kernel/sched/core.c | 79 +++++++++++++++++++++++++++++++++++- + kernel/sched/fair.c | 16 ++++---- + kernel/sched/features.h | 3 ++ + kernel/sched/sched.h | 9 ++++ + kernel/trace/trace.c | 50 ++++++++++++++--------- + kernel/trace/trace_events.c | 1 + + kernel/trace/trace_output.c | 18 +++++++- + 12 files changed, 260 insertions(+), 35 deletions(-) + +diff --git a/include/linux/preempt.h b/include/linux/preempt.h +index 8cfcc5d45451..9fc4c4bb320f 100644 +--- a/include/linux/preempt.h ++++ b/include/linux/preempt.h +@@ -207,6 +207,20 @@ extern void preempt_count_sub(int val); + #define preempt_count_inc() preempt_count_add(1) + #define preempt_count_dec() preempt_count_sub(1) + ++#ifdef CONFIG_PREEMPT_LAZY ++#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) ++#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) ++#define inc_preempt_lazy_count() add_preempt_lazy_count(1) ++#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) ++#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) ++#else ++#define add_preempt_lazy_count(val) do { } while (0) ++#define sub_preempt_lazy_count(val) do { } while (0) ++#define inc_preempt_lazy_count() do { } while (0) ++#define dec_preempt_lazy_count() do { } while (0) ++#define preempt_lazy_count() (0) ++#endif ++ + #ifdef CONFIG_PREEMPT_COUNT + + #define preempt_disable() \ +@@ -215,6 +229,12 @@ do { \ + barrier(); \ + } while (0) + ++#define preempt_lazy_disable() \ ++do { \ ++ inc_preempt_lazy_count(); \ ++ barrier(); \ ++} while (0) ++ + #define sched_preempt_enable_no_resched() \ + do { \ + barrier(); \ +@@ -246,6 +266,18 @@ do { \ + __preempt_schedule(); \ + } while (0) + ++/* ++ * open code preempt_check_resched() because it is not exported to modules and ++ * used by local_unlock() or bpf_enable_instrumentation(). ++ */ ++#define preempt_lazy_enable() \ ++do { \ ++ dec_preempt_lazy_count(); \ ++ barrier(); \ ++ if (should_resched(0)) \ ++ __preempt_schedule(); \ ++} while (0) ++ + #else /* !CONFIG_PREEMPTION */ + #define preempt_enable() \ + do { \ +@@ -253,6 +285,12 @@ do { \ + preempt_count_dec(); \ + } while (0) + ++#define preempt_lazy_enable() \ ++do { \ ++ dec_preempt_lazy_count(); \ ++ barrier(); \ ++} while (0) ++ + #define preempt_enable_notrace() \ + do { \ + barrier(); \ +@@ -293,6 +331,9 @@ do { \ + #define preempt_enable_notrace() barrier() + #define preemptible() 0 + ++#define preempt_lazy_disable() barrier() ++#define preempt_lazy_enable() barrier() ++ + #endif /* CONFIG_PREEMPT_COUNT */ + + #ifdef MODULE +@@ -311,7 +352,7 @@ do { \ + } while (0) + #define preempt_fold_need_resched() \ + do { \ +- if (tif_need_resched()) \ ++ if (tif_need_resched_now()) \ + set_preempt_need_resched(); \ + } while (0) + +@@ -427,8 +468,15 @@ extern void migrate_enable(void); + + #else + +-static inline void migrate_disable(void) { } +-static inline void migrate_enable(void) { } ++static inline void migrate_disable(void) ++{ ++ preempt_lazy_disable(); ++} ++ ++static inline void migrate_enable(void) ++{ ++ preempt_lazy_enable(); ++} + + #endif /* CONFIG_SMP */ + +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 0cac69902ec5..67ec36dbfacf 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -2061,6 +2061,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); + } + ++#ifdef CONFIG_PREEMPT_LAZY ++static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) ++{ ++ set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); ++} ++ ++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) ++{ ++ clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); ++} ++ ++static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) ++{ ++ return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); ++} ++ ++static inline int need_resched_lazy(void) ++{ ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++} ++ ++static inline int need_resched_now(void) ++{ ++ return test_thread_flag(TIF_NEED_RESCHED); ++} ++ ++#else ++static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } ++static inline int need_resched_lazy(void) { return 0; } ++ ++static inline int need_resched_now(void) ++{ ++ return test_thread_flag(TIF_NEED_RESCHED); ++} ++ ++#endif ++ + /* + * cond_resched() and cond_resched_lock(): latency reduction via + * explicit rescheduling in places that are safe. The return +diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h +index 9f392ec76f2b..779e0e96b9cb 100644 +--- a/include/linux/thread_info.h ++++ b/include/linux/thread_info.h +@@ -177,7 +177,17 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti + clear_ti_thread_flag(task_thread_info(t), TIF_##fl) + #endif /* !CONFIG_GENERIC_ENTRY */ + +-#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) ++#ifdef CONFIG_PREEMPT_LAZY ++#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ ++ test_thread_flag(TIF_NEED_RESCHED_LAZY)) ++#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) ++#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY) ++ ++#else ++#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) ++#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) ++#define tif_need_resched_lazy() 0 ++#endif + + #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES + static inline int arch_within_stack_frames(const void * const stack, +diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h +index c8b5e9781d01..743b1183d184 100644 +--- a/include/linux/trace_events.h ++++ b/include/linux/trace_events.h +@@ -70,6 +70,7 @@ struct trace_entry { + unsigned char flags; + unsigned char preempt_count; + int pid; ++ unsigned char preempt_lazy_count; + }; + + #define TRACE_EVENT_TYPE_MAX \ +@@ -159,9 +160,10 @@ static inline void tracing_generic_entry_update(struct trace_entry *entry, + unsigned int trace_ctx) + { + entry->preempt_count = trace_ctx & 0xff; ++ entry->preempt_lazy_count = (trace_ctx >> 16) & 0xff; + entry->pid = current->pid; + entry->type = type; +- entry->flags = trace_ctx >> 16; ++ entry->flags = trace_ctx >> 24; + } + + unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status); +@@ -172,7 +174,13 @@ enum trace_flag_type { + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, ++#ifdef CONFIG_PREEMPT_LAZY ++ TRACE_FLAG_PREEMPT_RESCHED = 0x00, ++ TRACE_FLAG_NEED_RESCHED_LAZY = 0x20, ++#else ++ TRACE_FLAG_NEED_RESCHED_LAZY = 0x00, + TRACE_FLAG_PREEMPT_RESCHED = 0x20, ++#endif + TRACE_FLAG_NMI = 0x40, + TRACE_FLAG_BH_OFF = 0x80, + }; +diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt +index c2f1fd95a821..260c08efeb48 100644 +--- a/kernel/Kconfig.preempt ++++ b/kernel/Kconfig.preempt +@@ -1,5 +1,11 @@ + # SPDX-License-Identifier: GPL-2.0-only + ++config HAVE_PREEMPT_LAZY ++ bool ++ ++config PREEMPT_LAZY ++ def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT ++ + config PREEMPT_NONE_BUILD + bool + +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 6bd06122850a..b72fc7d336e4 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -1040,6 +1040,46 @@ void resched_curr(struct rq *rq) + trace_sched_wake_idle_without_ipi(cpu); + } + ++#ifdef CONFIG_PREEMPT_LAZY ++ ++static int tsk_is_polling(struct task_struct *p) ++{ ++#ifdef TIF_POLLING_NRFLAG ++ return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); ++#else ++ return 0; ++#endif ++} ++ ++void resched_curr_lazy(struct rq *rq) ++{ ++ struct task_struct *curr = rq->curr; ++ int cpu; ++ ++ if (!sched_feat(PREEMPT_LAZY)) { ++ resched_curr(rq); ++ return; ++ } ++ ++ if (test_tsk_need_resched(curr)) ++ return; ++ ++ if (test_tsk_need_resched_lazy(curr)) ++ return; ++ ++ set_tsk_need_resched_lazy(curr); ++ ++ cpu = cpu_of(rq); ++ if (cpu == smp_processor_id()) ++ return; ++ ++ /* NEED_RESCHED_LAZY must be visible before we test polling */ ++ smp_mb(); ++ if (!tsk_is_polling(curr)) ++ smp_send_reschedule(cpu); ++} ++#endif ++ + void resched_cpu(int cpu) + { + struct rq *rq = cpu_rq(cpu); +@@ -2224,6 +2264,7 @@ void migrate_disable(void) + preempt_disable(); + this_rq()->nr_pinned++; + p->migration_disabled = 1; ++ preempt_lazy_disable(); + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_disable); +@@ -2255,6 +2296,7 @@ void migrate_enable(void) + barrier(); + p->migration_disabled = 0; + this_rq()->nr_pinned--; ++ preempt_lazy_enable(); + preempt_enable(); + } + EXPORT_SYMBOL_GPL(migrate_enable); +@@ -4722,6 +4764,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) + p->on_cpu = 0; + #endif + init_task_preempt_count(p); ++#ifdef CONFIG_HAVE_PREEMPT_LAZY ++ task_thread_info(p)->preempt_lazy_count = 0; ++#endif + #ifdef CONFIG_SMP + plist_node_init(&p->pushable_tasks, MAX_PRIO); + RB_CLEAR_NODE(&p->pushable_dl_tasks); +@@ -6592,6 +6637,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) + + next = pick_next_task(rq, prev, &rf); + clear_tsk_need_resched(prev); ++ clear_tsk_need_resched_lazy(prev); + clear_preempt_need_resched(); + #ifdef CONFIG_SCHED_DEBUG + rq->last_seen_need_resched_ns = 0; +@@ -6806,6 +6852,30 @@ static void __sched notrace preempt_schedule_common(void) + } while (need_resched()); + } + ++#ifdef CONFIG_PREEMPT_LAZY ++/* ++ * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is ++ * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as ++ * preempt_lazy_count counter >0. ++ */ ++static __always_inline int preemptible_lazy(void) ++{ ++ if (test_thread_flag(TIF_NEED_RESCHED)) ++ return 1; ++ if (current_thread_info()->preempt_lazy_count) ++ return 0; ++ return 1; ++} ++ ++#else ++ ++static inline int preemptible_lazy(void) ++{ ++ return 1; ++} ++ ++#endif ++ + #ifdef CONFIG_PREEMPTION + /* + * This is the entry point to schedule() from in-kernel preemption +@@ -6819,6 +6889,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) + */ + if (likely(!preemptible())) + return; ++ if (!preemptible_lazy()) ++ return; + preempt_schedule_common(); + } + NOKPROBE_SYMBOL(preempt_schedule); +@@ -6866,6 +6938,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) + if (likely(!preemptible())) + return; + ++ if (!preemptible_lazy()) ++ return; ++ + do { + /* + * Because the function tracer can trace preempt_count_sub() +@@ -9131,7 +9206,9 @@ void __init init_idle(struct task_struct *idle, int cpu) + + /* Set the preempt count _outside_ the spinlocks! */ + init_idle_preempt_count(idle, cpu); +- ++#ifdef CONFIG_HAVE_PREEMPT_LAZY ++ task_thread_info(idle)->preempt_lazy_count = 0; ++#endif + /* + * The idle tasks have their own, simple scheduling class: + */ +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 2558ab9033be..2dc35af7b5a6 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4914,7 +4914,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) + ideal_runtime = sched_slice(cfs_rq, curr); + delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; + if (delta_exec > ideal_runtime) { +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + /* + * The current task ran long enough, ensure it doesn't get + * re-elected due to buddy favours. +@@ -4938,7 +4938,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) + return; + + if (delta > ideal_runtime) +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + } + + static void +@@ -5084,7 +5084,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) + * validating it and just reschedule. + */ + if (queued) { +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + return; + } + /* +@@ -5233,7 +5233,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) + * hierarchy can be throttled + */ + if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) +- resched_curr(rq_of(cfs_rq)); ++ resched_curr_lazy(rq_of(cfs_rq)); + } + + static __always_inline +@@ -5984,7 +5984,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) + + if (delta < 0) { + if (task_current(rq, p)) +- resched_curr(rq); ++ resched_curr_lazy(rq); + return; + } + hrtick_start(rq, delta); +@@ -7712,7 +7712,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ + return; + + preempt: +- resched_curr(rq); ++ resched_curr_lazy(rq); + /* + * Only set the backward buddy when the current task is still + * on the rq. This can happen when a wakeup gets interleaved +@@ -11877,7 +11877,7 @@ static void task_fork_fair(struct task_struct *p) + * 'current' within the tree based on its new key value. + */ + swap(curr->vruntime, se->vruntime); +- resched_curr(rq); ++ resched_curr_lazy(rq); + } + + se->vruntime -= cfs_rq->min_vruntime; +@@ -11904,7 +11904,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) + */ + if (task_current(rq, p)) { + if (p->prio > oldprio) +- resched_curr(rq); ++ resched_curr_lazy(rq); + } else + check_preempt_curr(rq, p, 0); + } +diff --git a/kernel/sched/features.h b/kernel/sched/features.h +index ee7f23c76bd3..e13090e33f3c 100644 +--- a/kernel/sched/features.h ++++ b/kernel/sched/features.h +@@ -48,6 +48,9 @@ SCHED_FEAT(NONTASK_CAPACITY, true) + + #ifdef CONFIG_PREEMPT_RT + SCHED_FEAT(TTWU_QUEUE, false) ++# ifdef CONFIG_PREEMPT_LAZY ++SCHED_FEAT(PREEMPT_LAZY, true) ++# endif + #else + + /* +diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h +index b62d53d7c264..f2577f511a41 100644 +--- a/kernel/sched/sched.h ++++ b/kernel/sched/sched.h +@@ -2350,6 +2350,15 @@ extern void reweight_task(struct task_struct *p, int prio); + extern void resched_curr(struct rq *rq); + extern void resched_cpu(int cpu); + ++#ifdef CONFIG_PREEMPT_LAZY ++extern void resched_curr_lazy(struct rq *rq); ++#else ++static inline void resched_curr_lazy(struct rq *rq) ++{ ++ resched_curr(rq); ++} ++#endif ++ + extern struct rt_bandwidth def_rt_bandwidth; + extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); + extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 87eca95b57fb..462564d652be 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -2616,11 +2616,19 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) + if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) + trace_flags |= TRACE_FLAG_BH_OFF; + +- if (tif_need_resched()) ++ if (tif_need_resched_now()) + trace_flags |= TRACE_FLAG_NEED_RESCHED; ++#ifdef CONFIG_PREEMPT_LAZY ++ /* Run out of bits. Share the LAZY and PREEMPT_RESCHED */ ++ if (need_resched_lazy()) ++ trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY; ++#else + if (test_preempt_need_resched()) + trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; +- return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | ++#endif ++ ++ return (trace_flags << 24) | (min_t(unsigned int, pc & 0xff, 0xf)) | ++ (preempt_lazy_count() & 0xff) << 16 | + (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; + } + +@@ -4212,15 +4220,17 @@ unsigned long trace_total_entries(struct trace_array *tr) + + static void print_lat_help_header(struct seq_file *m) + { +- seq_puts(m, "# _------=> CPU# \n" +- "# / _-----=> irqs-off/BH-disabled\n" +- "# | / _----=> need-resched \n" +- "# || / _---=> hardirq/softirq \n" +- "# ||| / _--=> preempt-depth \n" +- "# |||| / _-=> migrate-disable \n" +- "# ||||| / delay \n" +- "# cmd pid |||||| time | caller \n" +- "# \\ / |||||| \\ | / \n"); ++ seq_puts(m, "# _--------=> CPU# \n" ++ "# / _-------=> irqs-off/BH-disabled\n" ++ "# | / _------=> need-resched \n" ++ "# || / _-----=> need-resched-lazy\n" ++ "# ||| / _----=> hardirq/softirq \n" ++ "# |||| / _---=> preempt-depth \n" ++ "# ||||| / _--=> preempt-lazy-depth\n" ++ "# |||||| / _-=> migrate-disable \n" ++ "# ||||||| / delay \n" ++ "# cmd pid |||||||| time | caller \n" ++ "# \\ / |||||||| \\ | / \n"); + } + + static void print_event_info(struct array_buffer *buf, struct seq_file *m) +@@ -4254,14 +4264,16 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file + + print_event_info(buf, m); + +- seq_printf(m, "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space); +- seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); +- seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); +- seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); +- seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); +- seq_printf(m, "# %.*s|||| / delay\n", prec, space); +- seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); +- seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); ++ seq_printf(m, "# %.*s _-------=> irqs-off/BH-disabled\n", prec, space); ++ seq_printf(m, "# %.*s / _------=> need-resched\n", prec, space); ++ seq_printf(m, "# %.*s| / _-----=> need-resched-lazy\n", prec, space); ++ seq_printf(m, "# %.*s|| / _----=> hardirq/softirq\n", prec, space); ++ seq_printf(m, "# %.*s||| / _---=> preempt-depth\n", prec, space); ++ seq_printf(m, "# %.*s|||| / _--=> preempt-lazy-depth\n", prec, space); ++ seq_printf(m, "# %.*s||||| / _-=> migrate-disable\n", prec, space); ++ seq_printf(m, "# %.*s|||||| / delay\n", prec, space); ++ seq_printf(m, "# TASK-PID %.*s CPU# ||||||| TIMESTAMP FUNCTION\n", prec, " TGID "); ++ seq_printf(m, "# | | %.*s | ||||||| | |\n", prec, " | "); + } + + void +diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c +index a6d2f99f847d..493c3f9cf01a 100644 +--- a/kernel/trace/trace_events.c ++++ b/kernel/trace/trace_events.c +@@ -208,6 +208,7 @@ static int trace_define_common_fields(void) + /* Holds both preempt_count and migrate_disable */ + __common_field(unsigned char, preempt_count); + __common_field(int, pid); ++ __common_field(unsigned char, preempt_lazy_count); + + return ret; + } +diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c +index 5cd4fb656306..3c227e2843ae 100644 +--- a/kernel/trace/trace_output.c ++++ b/kernel/trace/trace_output.c +@@ -442,6 +442,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) + { + char hardsoft_irq; + char need_resched; ++ char need_resched_lazy; + char irqs_off; + int hardirq; + int softirq; +@@ -462,20 +463,27 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) + + switch (entry->flags & (TRACE_FLAG_NEED_RESCHED | + TRACE_FLAG_PREEMPT_RESCHED)) { ++#ifndef CONFIG_PREEMPT_LAZY + case TRACE_FLAG_NEED_RESCHED | TRACE_FLAG_PREEMPT_RESCHED: + need_resched = 'N'; + break; ++#endif + case TRACE_FLAG_NEED_RESCHED: + need_resched = 'n'; + break; ++#ifndef CONFIG_PREEMPT_LAZY + case TRACE_FLAG_PREEMPT_RESCHED: + need_resched = 'p'; + break; ++#endif + default: + need_resched = '.'; + break; + } + ++ need_resched_lazy = ++ (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; ++ + hardsoft_irq = + (nmi && hardirq) ? 'Z' : + nmi ? 'z' : +@@ -484,14 +492,20 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) + softirq ? 's' : + '.' ; + +- trace_seq_printf(s, "%c%c%c", +- irqs_off, need_resched, hardsoft_irq); ++ trace_seq_printf(s, "%c%c%c%c", ++ irqs_off, need_resched, need_resched_lazy, ++ hardsoft_irq); + + if (entry->preempt_count & 0xf) + trace_seq_printf(s, "%x", entry->preempt_count & 0xf); + else + trace_seq_putc(s, '.'); + ++ if (entry->preempt_lazy_count) ++ trace_seq_printf(s, "%x", entry->preempt_lazy_count); ++ else ++ trace_seq_putc(s, '.'); ++ + if (entry->preempt_count & 0xf0) + trace_seq_printf(s, "%x", entry->preempt_count >> 4); + else +-- +2.43.0 + diff --git a/debian/patches-rt/0033-x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch b/debian/patches-rt/0033-x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch new file mode 100644 index 000000000..69ddd41b3 --- /dev/null +++ b/debian/patches-rt/0033-x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch @@ -0,0 +1,36 @@ +From 37719a07083b68dad1449227e9fe66e8e6c9f2b6 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 30 Jun 2020 11:45:14 +0200 +Subject: [PATCH 33/62] x86/entry: Use should_resched() in + idtentry_exit_cond_resched() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The TIF_NEED_RESCHED bit is inlined on x86 into the preemption counter. +By using should_resched(0) instead of need_resched() the same check can +be performed which uses the same variable as 'preempt_count()` which was +issued before. + +Use should_resched(0) instead need_resched(). + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/entry/common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index be61332c66b5..97ff5faad4fb 100644 +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -386,7 +386,7 @@ void raw_irqentry_exit_cond_resched(void) + rcu_irq_exit_check_preempt(); + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) + WARN_ON_ONCE(!on_thread_stack()); +- if (need_resched()) ++ if (should_resched(0)) + preempt_schedule_irq(); + } + } +-- +2.43.0 + diff --git a/debian/patches-rt/0034-x86-Support-for-lazy-preemption.patch b/debian/patches-rt/0034-x86-Support-for-lazy-preemption.patch new file mode 100644 index 000000000..511fe0a01 --- /dev/null +++ b/debian/patches-rt/0034-x86-Support-for-lazy-preemption.patch @@ -0,0 +1,158 @@ +From 65ce5ba8ccb26e6de364e76228e645b2c02b921d Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 1 Nov 2012 11:03:47 +0100 +Subject: [PATCH 34/62] x86: Support for lazy preemption +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Implement the x86 pieces for lazy preempt. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/x86/Kconfig | 1 + + arch/x86/include/asm/preempt.h | 33 +++++++++++++++++++++++++++++- + arch/x86/include/asm/thread_info.h | 7 +++++++ + include/linux/entry-common.h | 2 +- + kernel/entry/common.c | 2 +- + 5 files changed, 42 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index c9bed9c69423..f38bd8a5061e 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -251,6 +251,7 @@ config X86 + select HAVE_PCI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_PREEMPT_LAZY + select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_MERGE_VMAS + select HAVE_POSIX_CPU_TIMERS_TASK_WORK +diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h +index 5f6daea1ee24..cd20b4a5719a 100644 +--- a/arch/x86/include/asm/preempt.h ++++ b/arch/x86/include/asm/preempt.h +@@ -90,17 +90,48 @@ static __always_inline void __preempt_count_sub(int val) + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ +-static __always_inline bool __preempt_count_dec_and_test(void) ++static __always_inline bool ____preempt_count_dec_and_test(void) + { + return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); + } + ++static __always_inline bool __preempt_count_dec_and_test(void) ++{ ++ if (____preempt_count_dec_and_test()) ++ return true; ++#ifdef CONFIG_PREEMPT_LAZY ++ if (preempt_count()) ++ return false; ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else ++ return false; ++#endif ++} ++ + /* + * Returns true when we need to resched and can (barring IRQ state). + */ + static __always_inline bool should_resched(int preempt_offset) + { ++#ifdef CONFIG_PREEMPT_LAZY ++ u32 tmp; ++ tmp = raw_cpu_read_4(__preempt_count); ++ if (tmp == preempt_offset) ++ return true; ++ ++ /* preempt count == 0 ? */ ++ tmp &= ~PREEMPT_NEED_RESCHED; ++ if (tmp != preempt_offset) ++ return false; ++ /* XXX PREEMPT_LOCK_OFFSET */ ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); ++#endif + } + + #ifdef CONFIG_PREEMPTION +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index f0cb881c1d69..0da06a9b5f72 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -57,6 +57,8 @@ struct thread_info { + unsigned long flags; /* low level flags */ + unsigned long syscall_work; /* SYSCALL_WORK_ flags */ + u32 status; /* thread synchronous flags */ ++ int preempt_lazy_count; /* 0 => lazy preemptable ++ <0 => BUG */ + #ifdef CONFIG_SMP + u32 cpu; /* current CPU */ + #endif +@@ -65,6 +67,7 @@ struct thread_info { + #define INIT_THREAD_INFO(tsk) \ + { \ + .flags = 0, \ ++ .preempt_lazy_count = 0, \ + } + + #else /* !__ASSEMBLY__ */ +@@ -92,6 +95,7 @@ struct thread_info { + #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ + #define TIF_NOTSC 16 /* TSC is not accessible in userland */ + #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ ++#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */ + #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ + #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ + #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ +@@ -115,6 +119,7 @@ struct thread_info { + #define _TIF_NOCPUID (1 << TIF_NOCPUID) + #define _TIF_NOTSC (1 << TIF_NOTSC) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) ++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) + #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) + #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) + #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) +@@ -146,6 +151,8 @@ struct thread_info { + + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) + ++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) ++ + #define STACK_WARN (THREAD_SIZE/8) + + /* +diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h +index d95ab85f96ba..93cc1ae12125 100644 +--- a/include/linux/entry-common.h ++++ b/include/linux/entry-common.h +@@ -59,7 +59,7 @@ + + #define EXIT_TO_USER_MODE_WORK \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ +- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ++ _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ + ARCH_EXIT_TO_USER_MODE_WORK) + + /** +diff --git a/kernel/entry/common.c b/kernel/entry/common.c +index 97ff5faad4fb..c6301e520d47 100644 +--- a/kernel/entry/common.c ++++ b/kernel/entry/common.c +@@ -155,7 +155,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, + + local_irq_enable_exit_to_user(ti_work); + +- if (ti_work & _TIF_NEED_RESCHED) ++ if (ti_work & _TIF_NEED_RESCHED_MASK) + schedule(); + + if (ti_work & _TIF_UPROBE) +-- +2.43.0 + diff --git a/debian/patches-rt/0035-entry-Fix-the-preempt-lazy-fallout.patch b/debian/patches-rt/0035-entry-Fix-the-preempt-lazy-fallout.patch new file mode 100644 index 000000000..27139e9bb --- /dev/null +++ b/debian/patches-rt/0035-entry-Fix-the-preempt-lazy-fallout.patch @@ -0,0 +1,49 @@ +From d37c604152cbded61a8e107918d3b9950725d897 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue, 13 Jul 2021 07:52:52 +0200 +Subject: [PATCH 35/62] entry: Fix the preempt lazy fallout +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Common code needs common defines.... + +Fixes: f2f9e496208c ("x86: Support for lazy preemption") +Reported-by: kernel test robot <lkp@intel.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/x86/include/asm/thread_info.h | 2 -- + include/linux/entry-common.h | 6 ++++++ + 2 files changed, 6 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h +index 0da06a9b5f72..fd8fb76f324f 100644 +--- a/arch/x86/include/asm/thread_info.h ++++ b/arch/x86/include/asm/thread_info.h +@@ -151,8 +151,6 @@ struct thread_info { + + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) + +-#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) +- + #define STACK_WARN (THREAD_SIZE/8) + + /* +diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h +index 93cc1ae12125..3dc3704a3cdb 100644 +--- a/include/linux/entry-common.h ++++ b/include/linux/entry-common.h +@@ -57,6 +57,12 @@ + # define ARCH_EXIT_TO_USER_MODE_WORK (0) + #endif + ++#ifdef CONFIG_PREEMPT_LAZY ++# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) ++#else ++# define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED) ++#endif ++ + #define EXIT_TO_USER_MODE_WORK \ + (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NEED_RESCHED_MASK | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ +-- +2.43.0 + diff --git a/debian/patches-rt/0036-arm-Add-support-for-lazy-preemption.patch b/debian/patches-rt/0036-arm-Add-support-for-lazy-preemption.patch new file mode 100644 index 000000000..8f76d13b0 --- /dev/null +++ b/debian/patches-rt/0036-arm-Add-support-for-lazy-preemption.patch @@ -0,0 +1,137 @@ +From f242fbbb3f85d6b9c8c8fc06ddbc83b9ca5a0511 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Wed, 31 Oct 2012 12:04:11 +0100 +Subject: [PATCH 36/62] arm: Add support for lazy preemption +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Implement the arm pieces for lazy preempt. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/arm/Kconfig | 1 + + arch/arm/include/asm/thread_info.h | 6 +++++- + arch/arm/kernel/asm-offsets.c | 1 + + arch/arm/kernel/entry-armv.S | 19 ++++++++++++++++--- + arch/arm/kernel/signal.c | 3 ++- + 5 files changed, 25 insertions(+), 5 deletions(-) + +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index 6d5afe2e6ba3..717e596dc13b 100644 +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -115,6 +115,7 @@ config ARM + select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_PREEMPT_LAZY + select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ +diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h +index 7f092cb55a41..ffcbf8ebed4b 100644 +--- a/arch/arm/include/asm/thread_info.h ++++ b/arch/arm/include/asm/thread_info.h +@@ -62,6 +62,7 @@ struct cpu_context_save { + struct thread_info { + unsigned long flags; /* low level flags */ + int preempt_count; /* 0 => preemptable, <0 => bug */ ++ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ + __u32 cpu; /* cpu */ + __u32 cpu_domain; /* cpu domain */ + struct cpu_context_save cpu_context; /* cpu context */ +@@ -129,6 +130,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, + #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ + #define TIF_UPROBE 3 /* breakpointed or singlestepping */ + #define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ ++#define TIF_NEED_RESCHED_LAZY 5 + + #define TIF_USING_IWMMXT 17 + #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ +@@ -148,6 +150,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, + #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) + #define _TIF_SECCOMP (1 << TIF_SECCOMP) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) ++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) + #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) + + /* Checks for any syscall work in entry-common.S */ +@@ -157,7 +160,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, + /* + * Change these and you break ASM code in entry-common.S + */ +-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ ++#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ ++ _TIF_SIGPENDING | \ + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_NOTIFY_SIGNAL) + +diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c +index 2c8d76fd7c66..c3bdec7d2df9 100644 +--- a/arch/arm/kernel/asm-offsets.c ++++ b/arch/arm/kernel/asm-offsets.c +@@ -43,6 +43,7 @@ int main(void) + BLANK(); + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); ++ DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + DEFINE(TI_CPU_DOMAIN, offsetof(struct thread_info, cpu_domain)); + DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); +diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S +index c39303e5c234..cfb4660e9fea 100644 +--- a/arch/arm/kernel/entry-armv.S ++++ b/arch/arm/kernel/entry-armv.S +@@ -222,11 +222,18 @@ ENDPROC(__dabt_svc) + + #ifdef CONFIG_PREEMPTION + ldr r8, [tsk, #TI_PREEMPT] @ get preempt count +- ldr r0, [tsk, #TI_FLAGS] @ get flags + teq r8, #0 @ if preempt count != 0 ++ bne 1f @ return from exeption ++ ldr r0, [tsk, #TI_FLAGS] @ get flags ++ tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set ++ blne svc_preempt @ preempt! ++ ++ ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count ++ teq r8, #0 @ if preempt lazy count != 0 + movne r0, #0 @ force flags to 0 +- tst r0, #_TIF_NEED_RESCHED ++ tst r0, #_TIF_NEED_RESCHED_LAZY + blne svc_preempt ++1: + #endif + + svc_exit r5, irq = 1 @ return from exception +@@ -241,8 +248,14 @@ ENDPROC(__irq_svc) + 1: bl preempt_schedule_irq @ irq en/disable is done inside + ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS + tst r0, #_TIF_NEED_RESCHED ++ bne 1b ++ tst r0, #_TIF_NEED_RESCHED_LAZY + reteq r8 @ go again +- b 1b ++ ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count ++ teq r0, #0 @ if preempt lazy count != 0 ++ beq 1b ++ ret r8 @ go again ++ + #endif + + __und_fault: +diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c +index e07f359254c3..b50a3248e79f 100644 +--- a/arch/arm/kernel/signal.c ++++ b/arch/arm/kernel/signal.c +@@ -607,7 +607,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) + */ + trace_hardirqs_off(); + do { +- if (likely(thread_flags & _TIF_NEED_RESCHED)) { ++ if (likely(thread_flags & (_TIF_NEED_RESCHED | ++ _TIF_NEED_RESCHED_LAZY))) { + schedule(); + } else { + if (unlikely(!user_mode(regs))) +-- +2.43.0 + diff --git a/debian/patches-rt/0037-powerpc-Add-support-for-lazy-preemption.patch b/debian/patches-rt/0037-powerpc-Add-support-for-lazy-preemption.patch new file mode 100644 index 000000000..bf8022c6c --- /dev/null +++ b/debian/patches-rt/0037-powerpc-Add-support-for-lazy-preemption.patch @@ -0,0 +1,118 @@ +From 65e31d7b980c1413f19fcb84234387f97b09588f Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 1 Nov 2012 10:14:11 +0100 +Subject: [PATCH 37/62] powerpc: Add support for lazy preemption +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Implement the powerpc pieces for lazy preempt. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/powerpc/Kconfig | 1 + + arch/powerpc/include/asm/thread_info.h | 8 ++++++++ + arch/powerpc/kernel/interrupt.c | 8 ++++++-- + 3 files changed, 15 insertions(+), 2 deletions(-) + +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 6050e6e10d32..0eff864d6ec3 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -242,6 +242,7 @@ config PPC + select HAVE_PERF_EVENTS_NMI if PPC64 + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_PREEMPT_LAZY + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE + select HAVE_RSEQ +diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h +index af58f1ed3952..520864de8bb2 100644 +--- a/arch/powerpc/include/asm/thread_info.h ++++ b/arch/powerpc/include/asm/thread_info.h +@@ -53,6 +53,8 @@ + struct thread_info { + int preempt_count; /* 0 => preemptable, + <0 => BUG */ ++ int preempt_lazy_count; /* 0 => preemptable, ++ <0 => BUG */ + #ifdef CONFIG_SMP + unsigned int cpu; + #endif +@@ -77,6 +79,7 @@ struct thread_info { + #define INIT_THREAD_INFO(tsk) \ + { \ + .preempt_count = INIT_PREEMPT_COUNT, \ ++ .preempt_lazy_count = 0, \ + .flags = 0, \ + } + +@@ -102,6 +105,7 @@ void arch_setup_new_exec(void); + #define TIF_PATCH_PENDING 6 /* pending live patching update */ + #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ + #define TIF_SINGLESTEP 8 /* singlestepping active */ ++#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ + #define TIF_SECCOMP 10 /* secure computing */ + #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ + #define TIF_NOERROR 12 /* Force successful syscall return */ +@@ -117,6 +121,7 @@ void arch_setup_new_exec(void); + #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ + #define TIF_32BIT 20 /* 32 bit binary */ + ++ + /* as above, but as bit values */ + #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) + #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) +@@ -128,6 +133,7 @@ void arch_setup_new_exec(void); + #define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING) + #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) + #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) ++#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) + #define _TIF_SECCOMP (1<<TIF_SECCOMP) + #define _TIF_RESTOREALL (1<<TIF_RESTOREALL) + #define _TIF_NOERROR (1<<TIF_NOERROR) +@@ -141,10 +147,12 @@ void arch_setup_new_exec(void); + _TIF_SYSCALL_EMU) + + #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ ++ _TIF_NEED_RESCHED_LAZY | \ + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ + _TIF_RESTORE_TM | _TIF_PATCH_PENDING | \ + _TIF_NOTIFY_SIGNAL) + #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) ++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) + + /* Bits in local_flags */ + /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ +diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c +index cf770d86c03c..2c454731c250 100644 +--- a/arch/powerpc/kernel/interrupt.c ++++ b/arch/powerpc/kernel/interrupt.c +@@ -186,7 +186,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) + ti_flags = read_thread_flags(); + while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { + local_irq_enable(); +- if (ti_flags & _TIF_NEED_RESCHED) { ++ if (ti_flags & _TIF_NEED_RESCHED_MASK) { + schedule(); + } else { + /* +@@ -397,11 +397,15 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) + /* Returning to a kernel context with local irqs enabled. */ + WARN_ON_ONCE(!(regs->msr & MSR_EE)); + again: +- if (IS_ENABLED(CONFIG_PREEMPT)) { ++ if (IS_ENABLED(CONFIG_PREEMPTION)) { + /* Return to preemptible kernel context */ + if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { + if (preempt_count() == 0) + preempt_schedule_irq(); ++ } else if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED_LAZY)) { ++ if ((preempt_count() == 0) && ++ (current_thread_info()->preempt_lazy_count == 0)) ++ preempt_schedule_irq(); + } + } + +-- +2.43.0 + diff --git a/debian/patches-rt/0038-arch-arm64-Add-lazy-preempt-support.patch b/debian/patches-rt/0038-arch-arm64-Add-lazy-preempt-support.patch new file mode 100644 index 000000000..4df0c8a40 --- /dev/null +++ b/debian/patches-rt/0038-arch-arm64-Add-lazy-preempt-support.patch @@ -0,0 +1,146 @@ +From 12a36ad989490f5ae3ed6a50d764385e8e27e024 Mon Sep 17 00:00:00 2001 +From: Anders Roxell <anders.roxell@linaro.org> +Date: Thu, 14 May 2015 17:52:17 +0200 +Subject: [PATCH 38/62] arch/arm64: Add lazy preempt support +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +arm64 is missing support for PREEMPT_RT. The main feature which is +lacking is support for lazy preemption. The arch-specific entry code, +thread information structure definitions, and associated data tables +have to be extended to provide this support. Then the Kconfig file has +to be extended to indicate the support is available, and also to +indicate that support for full RT preemption is now available. + +Signed-off-by: Anders Roxell <anders.roxell@linaro.org> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/arm64/Kconfig | 1 + + arch/arm64/include/asm/preempt.h | 25 ++++++++++++++++++++++++- + arch/arm64/include/asm/thread_info.h | 8 +++++++- + arch/arm64/kernel/asm-offsets.c | 1 + + arch/arm64/kernel/signal.c | 2 +- + 5 files changed, 34 insertions(+), 3 deletions(-) + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index ea70eb960565..6e16670a7f43 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -199,6 +199,7 @@ config ARM64 + select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY + select HAVE_REGS_AND_STACK_ACCESS_API ++ select HAVE_PREEMPT_LAZY + select HAVE_POSIX_CPU_TIMERS_TASK_WORK + select HAVE_FUNCTION_ARG_ACCESS_API + select MMU_GATHER_RCU_TABLE_FREE +diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h +index 0159b625cc7f..a5486918e5ee 100644 +--- a/arch/arm64/include/asm/preempt.h ++++ b/arch/arm64/include/asm/preempt.h +@@ -71,13 +71,36 @@ static inline bool __preempt_count_dec_and_test(void) + * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE + * pair. + */ +- return !pc || !READ_ONCE(ti->preempt_count); ++ if (!pc || !READ_ONCE(ti->preempt_count)) ++ return true; ++#ifdef CONFIG_PREEMPT_LAZY ++ if ((pc & ~PREEMPT_NEED_RESCHED)) ++ return false; ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else ++ return false; ++#endif + } + + static inline bool should_resched(int preempt_offset) + { ++#ifdef CONFIG_PREEMPT_LAZY ++ u64 pc = READ_ONCE(current_thread_info()->preempt_count); ++ if (pc == preempt_offset) ++ return true; ++ ++ if ((pc & ~PREEMPT_NEED_RESCHED) != preempt_offset) ++ return false; ++ ++ if (current_thread_info()->preempt_lazy_count) ++ return false; ++ return test_thread_flag(TIF_NEED_RESCHED_LAZY); ++#else + u64 pc = READ_ONCE(current_thread_info()->preempt_count); + return pc == preempt_offset; ++#endif + } + + #ifdef CONFIG_PREEMPTION +diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h +index 848739c15de8..4b7148fd5551 100644 +--- a/arch/arm64/include/asm/thread_info.h ++++ b/arch/arm64/include/asm/thread_info.h +@@ -26,6 +26,7 @@ struct thread_info { + #ifdef CONFIG_ARM64_SW_TTBR0_PAN + u64 ttbr0; /* saved TTBR0_EL1 */ + #endif ++ int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ + union { + u64 preempt_count; /* 0 => preemptible, <0 => bug */ + struct { +@@ -68,6 +69,7 @@ int arch_dup_task_struct(struct task_struct *dst, + #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ + #define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ + #define TIF_NOTIFY_SIGNAL 6 /* signal notifications exist */ ++#define TIF_NEED_RESCHED_LAZY 7 + #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ + #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ + #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ +@@ -100,8 +102,10 @@ int arch_dup_task_struct(struct task_struct *dst, + #define _TIF_SVE (1 << TIF_SVE) + #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) + #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) ++#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) + +-#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ ++#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \ ++ _TIF_SIGPENDING | \ + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ + _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ + _TIF_NOTIFY_SIGNAL) +@@ -110,6 +114,8 @@ int arch_dup_task_struct(struct task_struct *dst, + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ + _TIF_SYSCALL_EMU) + ++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) ++ + #ifdef CONFIG_SHADOW_CALL_STACK + #define INIT_SCS \ + .scs_base = init_shadow_call_stack, \ +diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c +index 1197e7679882..e74c0415f67e 100644 +--- a/arch/arm64/kernel/asm-offsets.c ++++ b/arch/arm64/kernel/asm-offsets.c +@@ -32,6 +32,7 @@ int main(void) + DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); + DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); + DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); ++ DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count)); + #ifdef CONFIG_ARM64_SW_TTBR0_PAN + DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); + #endif +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index 82f4572c8ddf..2a606c7bf025 100644 +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -1108,7 +1108,7 @@ static void do_signal(struct pt_regs *regs) + void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) + { + do { +- if (thread_flags & _TIF_NEED_RESCHED) { ++ if (thread_flags & _TIF_NEED_RESCHED_MASK) { + /* Unmask Debug and SError for the next task */ + local_daif_restore(DAIF_PROCCTX_NOIRQ); + +-- +2.43.0 + diff --git a/debian/patches-rt/0039-arm-Disable-jump-label-on-PREEMPT_RT.patch b/debian/patches-rt/0039-arm-Disable-jump-label-on-PREEMPT_RT.patch new file mode 100644 index 000000000..ac47159c4 --- /dev/null +++ b/debian/patches-rt/0039-arm-Disable-jump-label-on-PREEMPT_RT.patch @@ -0,0 +1,42 @@ +From f1c0950a69e46ee1d45192bfae026622da60bdc8 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Wed, 8 Jul 2015 17:14:48 +0200 +Subject: [PATCH 39/62] arm: Disable jump-label on PREEMPT_RT. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +jump-labels are used to efficiently switch between two possible code +paths. To achieve this, stop_machine() is used to keep the CPU in a +known state while the opcode is modified. The usage of stop_machine() +here leads to large latency spikes which can be observed on PREEMPT_RT. + +Jump labels may change the target during runtime and are not restricted +to debug or "configuration/ setup" part of a PREEMPT_RT system where +high latencies could be defined as acceptable. + +Disable jump-label support on a PREEMPT_RT system. + +[bigeasy: Patch description.] + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220613182447.112191-2-bigeasy@linutronix.de +--- + arch/arm/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index 717e596dc13b..f170f29e98ac 100644 +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -71,7 +71,7 @@ config ARM + select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT + select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 +- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU ++ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT + select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL +-- +2.43.0 + diff --git a/debian/patches-rt/0040-ARM-enable-irq-in-translation-section-permission-fau.patch b/debian/patches-rt/0040-ARM-enable-irq-in-translation-section-permission-fau.patch new file mode 100644 index 000000000..ee53f7bd6 --- /dev/null +++ b/debian/patches-rt/0040-ARM-enable-irq-in-translation-section-permission-fau.patch @@ -0,0 +1,97 @@ +From 802d6978bf4ebdec28d2ba4e715c2e414d9c7d06 Mon Sep 17 00:00:00 2001 +From: "Yadi.hu" <yadi.hu@windriver.com> +Date: Wed, 10 Dec 2014 10:32:09 +0800 +Subject: [PATCH 40/62] ARM: enable irq in translation/section permission fault + handlers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Probably happens on all ARM, with +CONFIG_PREEMPT_RT +CONFIG_DEBUG_ATOMIC_SLEEP + +This simple program.... + +int main() { + *((char*)0xc0001000) = 0; +}; + +[ 512.742724] BUG: sleeping function called from invalid context at kernel/rtmutex.c:658 +[ 512.743000] in_atomic(): 0, irqs_disabled(): 128, pid: 994, name: a +[ 512.743217] INFO: lockdep is turned off. +[ 512.743360] irq event stamp: 0 +[ 512.743482] hardirqs last enabled at (0): [< (null)>] (null) +[ 512.743714] hardirqs last disabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0 +[ 512.744013] softirqs last enabled at (0): [<c0426370>] copy_process+0x3b0/0x11c0 +[ 512.744303] softirqs last disabled at (0): [< (null)>] (null) +[ 512.744631] [<c041872c>] (unwind_backtrace+0x0/0x104) +[ 512.745001] [<c09af0c4>] (dump_stack+0x20/0x24) +[ 512.745355] [<c0462490>] (__might_sleep+0x1dc/0x1e0) +[ 512.745717] [<c09b6770>] (rt_spin_lock+0x34/0x6c) +[ 512.746073] [<c0441bf0>] (do_force_sig_info+0x34/0xf0) +[ 512.746457] [<c0442668>] (force_sig_info+0x18/0x1c) +[ 512.746829] [<c041d880>] (__do_user_fault+0x9c/0xd8) +[ 512.747185] [<c041d938>] (do_bad_area+0x7c/0x94) +[ 512.747536] [<c041d990>] (do_sect_fault+0x40/0x48) +[ 512.747898] [<c040841c>] (do_DataAbort+0x40/0xa0) +[ 512.748181] Exception stack(0xecaa1fb0 to 0xecaa1ff8) + +Oxc0000000 belongs to kernel address space, user task can not be +allowed to access it. For above condition, correct result is that +test case should receive a “segment fault” and exits but not stacks. + +the root cause is commit 02fe2845d6a8 ("avoid enabling interrupts in +prefetch/data abort handlers"),it deletes irq enable block in Data +abort assemble code and move them into page/breakpiont/alignment fault +handlers instead. But author does not enable irq in translation/section +permission fault handlers. ARM disables irq when it enters exception/ +interrupt mode, if kernel doesn't enable irq, it would be still disabled +during translation/section permission fault. + +We see the above splat because do_force_sig_info is still called with +IRQs off, and that code eventually does a: + + spin_lock_irqsave(&t->sighand->siglock, flags); + +As this is architecture independent code, and we've not seen any other +need for other arch to have the siglock converted to raw lock, we can +conclude that we should enable irq for ARM translation/section +permission exception. + + +Signed-off-by: Yadi.hu <yadi.hu@windriver.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/arm/mm/fault.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c +index b0db85310331..77877dcb54ed 100644 +--- a/arch/arm/mm/fault.c ++++ b/arch/arm/mm/fault.c +@@ -400,6 +400,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, + if (addr < TASK_SIZE) + return do_page_fault(addr, fsr, regs); + ++ if (interrupts_enabled(regs)) ++ local_irq_enable(); ++ + if (user_mode(regs)) + goto bad_area; + +@@ -470,6 +473,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, + static int + do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) + { ++ if (interrupts_enabled(regs)) ++ local_irq_enable(); ++ + do_bad_area(addr, fsr, regs); + return 0; + } +-- +2.43.0 + diff --git a/debian/patches-rt/0041-tty-serial-omap-Make-the-locking-RT-aware.patch b/debian/patches-rt/0041-tty-serial-omap-Make-the-locking-RT-aware.patch new file mode 100644 index 000000000..b719f7f3b --- /dev/null +++ b/debian/patches-rt/0041-tty-serial-omap-Make-the-locking-RT-aware.patch @@ -0,0 +1,49 @@ +From 2a89ee21ea5c408b560839cc06bd0c13580fb3a4 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 28 Jul 2011 13:32:57 +0200 +Subject: [PATCH 41/62] tty/serial/omap: Make the locking RT aware +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The lock is a sleeping lock and local_irq_save() is not the +optimsation we are looking for. Redo it to make it work on -RT and +non-RT. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + drivers/tty/serial/omap-serial.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c +index 7d0d2718ef59..aa216fdbcb1d 100644 +--- a/drivers/tty/serial/omap-serial.c ++++ b/drivers/tty/serial/omap-serial.c +@@ -1241,13 +1241,10 @@ serial_omap_console_write(struct console *co, const char *s, + unsigned int ier; + int locked = 1; + +- local_irq_save(flags); +- if (up->port.sysrq) +- locked = 0; +- else if (oops_in_progress) +- locked = spin_trylock(&up->port.lock); ++ if (up->port.sysrq || oops_in_progress) ++ locked = spin_trylock_irqsave(&up->port.lock, flags); + else +- spin_lock(&up->port.lock); ++ spin_lock_irqsave(&up->port.lock, flags); + + /* + * First save the IER then disable the interrupts +@@ -1274,8 +1271,7 @@ serial_omap_console_write(struct console *co, const char *s, + check_modem_status(up); + + if (locked) +- spin_unlock(&up->port.lock); +- local_irq_restore(flags); ++ spin_unlock_irqrestore(&up->port.lock, flags); + } + + static int __init +-- +2.43.0 + diff --git a/debian/patches-rt/0042-tty-serial-pl011-Make-the-locking-work-on-RT.patch b/debian/patches-rt/0042-tty-serial-pl011-Make-the-locking-work-on-RT.patch new file mode 100644 index 000000000..c80dc32de --- /dev/null +++ b/debian/patches-rt/0042-tty-serial-pl011-Make-the-locking-work-on-RT.patch @@ -0,0 +1,60 @@ +From 1db3dc8a6af68447b52efbbf8dbb4d210d23d57b Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue, 8 Jan 2013 21:36:51 +0100 +Subject: [PATCH 42/62] tty/serial/pl011: Make the locking work on RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The lock is a sleeping lock and local_irq_save() is not the optimsation +we are looking for. Redo it to make it work on -RT and non-RT. + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + drivers/tty/serial/amba-pl011.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c +index c74eaf2552c3..38eb30b8491b 100644 +--- a/drivers/tty/serial/amba-pl011.c ++++ b/drivers/tty/serial/amba-pl011.c +@@ -2316,18 +2316,24 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) + { + struct uart_amba_port *uap = amba_ports[co->index]; + unsigned int old_cr = 0, new_cr; +- unsigned long flags; ++ unsigned long flags = 0; + int locked = 1; + + clk_enable(uap->clk); + +- local_irq_save(flags); ++ /* ++ * local_irq_save(flags); ++ * ++ * This local_irq_save() is nonsense. If we come in via sysrq ++ * handling then interrupts are already disabled. Aside of ++ * that the port.sysrq check is racy on SMP regardless. ++ */ + if (uap->port.sysrq) + locked = 0; + else if (oops_in_progress) +- locked = spin_trylock(&uap->port.lock); ++ locked = spin_trylock_irqsave(&uap->port.lock, flags); + else +- spin_lock(&uap->port.lock); ++ spin_lock_irqsave(&uap->port.lock, flags); + + /* + * First save the CR then disable the interrupts +@@ -2353,8 +2359,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) + pl011_write(old_cr, uap, REG_CR); + + if (locked) +- spin_unlock(&uap->port.lock); +- local_irq_restore(flags); ++ spin_unlock_irqrestore(&uap->port.lock, flags); + + clk_disable(uap->clk); + } +-- +2.43.0 + diff --git a/debian/patches-rt/0043-ARM-Allow-to-enable-RT.patch b/debian/patches-rt/0043-ARM-Allow-to-enable-RT.patch new file mode 100644 index 000000000..dd5a320a2 --- /dev/null +++ b/debian/patches-rt/0043-ARM-Allow-to-enable-RT.patch @@ -0,0 +1,37 @@ +From 47c9956482a592a16b58831ded27e3c0f62ec11d Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 11 Oct 2019 13:14:29 +0200 +Subject: [PATCH 43/62] ARM: Allow to enable RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/arm/Kconfig | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig +index f170f29e98ac..d1f2e062ce0b 100644 +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -33,6 +33,7 @@ config ARM + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_MEMTEST +@@ -115,6 +116,7 @@ config ARM + select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select HAVE_PREEMPT_LAZY + select MMU_GATHER_RCU_TABLE_FREE if SMP && ARM_LPAE + select HAVE_REGS_AND_STACK_ACCESS_API +-- +2.43.0 + diff --git a/debian/patches-rt/0044-ARM64-Allow-to-enable-RT.patch b/debian/patches-rt/0044-ARM64-Allow-to-enable-RT.patch new file mode 100644 index 000000000..478d2e7a8 --- /dev/null +++ b/debian/patches-rt/0044-ARM64-Allow-to-enable-RT.patch @@ -0,0 +1,29 @@ +From c71e1e0561e008bb2fe230ad7022c3e2483cd6c0 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 11 Oct 2019 13:14:35 +0200 +Subject: [PATCH 44/62] ARM64: Allow to enable RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/arm64/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig +index 6e16670a7f43..0c617e48177c 100644 +--- a/arch/arm64/Kconfig ++++ b/arch/arm64/Kconfig +@@ -93,6 +93,7 @@ config ARM64 + select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 + select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_PAGE_TABLE_CHECK ++ select ARCH_SUPPORTS_RT + select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT + select ARCH_WANT_DEFAULT_BPF_JIT + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT +-- +2.43.0 + diff --git a/debian/patches-rt/0045-powerpc-traps-Use-PREEMPT_RT.patch b/debian/patches-rt/0045-powerpc-traps-Use-PREEMPT_RT.patch new file mode 100644 index 000000000..76c18de22 --- /dev/null +++ b/debian/patches-rt/0045-powerpc-traps-Use-PREEMPT_RT.patch @@ -0,0 +1,40 @@ +From 4d7273bd07600b933e6d25807cd96df04e435cbe Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 26 Jul 2019 11:30:49 +0200 +Subject: [PATCH 45/62] powerpc: traps: Use PREEMPT_RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Add PREEMPT_RT to the backtrace if enabled. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/powerpc/kernel/traps.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c +index 3956f32682c6..8e15205e51ef 100644 +--- a/arch/powerpc/kernel/traps.c ++++ b/arch/powerpc/kernel/traps.c +@@ -261,12 +261,17 @@ static char *get_mmu_str(void) + + static int __die(const char *str, struct pt_regs *regs, long err) + { ++ const char *pr = ""; ++ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); + ++ if (IS_ENABLED(CONFIG_PREEMPTION)) ++ pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT"; ++ + printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", + IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", + PAGE_SIZE / 1024, get_mmu_str(), +- IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", ++ pr, + IS_ENABLED(CONFIG_SMP) ? " SMP" : "", + IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", + debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", +-- +2.43.0 + diff --git a/debian/patches-rt/0046-powerpc-pseries-iommu-Use-a-locallock-instead-local_.patch b/debian/patches-rt/0046-powerpc-pseries-iommu-Use-a-locallock-instead-local_.patch new file mode 100644 index 000000000..9d6e8ab00 --- /dev/null +++ b/debian/patches-rt/0046-powerpc-pseries-iommu-Use-a-locallock-instead-local_.patch @@ -0,0 +1,118 @@ +From 1d20f49e62250211b43cbe18a087fdf19c313081 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 26 Mar 2019 18:31:54 +0100 +Subject: [PATCH 46/62] powerpc/pseries/iommu: Use a locallock instead + local_irq_save() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +The locallock protects the per-CPU variable tce_page. The function +attempts to allocate memory while tce_page is protected (by disabling +interrupts). + +Use local_irq_save() instead of local_irq_disable(). + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/powerpc/platforms/pseries/iommu.c | 31 +++++++++++++++++--------- + 1 file changed, 20 insertions(+), 11 deletions(-) + +diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c +index 97b026130c71..01b3d19be382 100644 +--- a/arch/powerpc/platforms/pseries/iommu.c ++++ b/arch/powerpc/platforms/pseries/iommu.c +@@ -24,6 +24,7 @@ + #include <linux/of.h> + #include <linux/iommu.h> + #include <linux/rculist.h> ++#include <linux/local_lock.h> + #include <asm/io.h> + #include <asm/prom.h> + #include <asm/rtas.h> +@@ -200,7 +201,13 @@ static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, + return ret; + } + +-static DEFINE_PER_CPU(__be64 *, tce_page); ++struct tce_page { ++ __be64 * page; ++ local_lock_t lock; ++}; ++static DEFINE_PER_CPU(struct tce_page, tce_page) = { ++ .lock = INIT_LOCAL_LOCK(lock), ++}; + + static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + long npages, unsigned long uaddr, +@@ -223,9 +230,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + direction, attrs); + } + +- local_irq_save(flags); /* to protect tcep and the page behind it */ ++ /* to protect tcep and the page behind it */ ++ local_lock_irqsave(&tce_page.lock, flags); + +- tcep = __this_cpu_read(tce_page); ++ tcep = __this_cpu_read(tce_page.page); + + /* This is safe to do since interrupts are off when we're called + * from iommu_alloc{,_sg}() +@@ -234,12 +242,12 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); + /* If allocation fails, fall back to the loop implementation */ + if (!tcep) { +- local_irq_restore(flags); ++ local_unlock_irqrestore(&tce_page.lock, flags); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tceshift, + npages, uaddr, direction, attrs); + } +- __this_cpu_write(tce_page, tcep); ++ __this_cpu_write(tce_page.page, tcep); + } + + rpn = __pa(uaddr) >> tceshift; +@@ -269,7 +277,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, + tcenum += limit; + } while (npages > 0 && !rc); + +- local_irq_restore(flags); ++ local_unlock_irqrestore(&tce_page.lock, flags); + + if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { + ret = (int)rc; +@@ -454,16 +462,17 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, + DMA_BIDIRECTIONAL, 0); + } + +- local_irq_disable(); /* to protect tcep and the page behind it */ +- tcep = __this_cpu_read(tce_page); ++ /* to protect tcep and the page behind it */ ++ local_lock_irq(&tce_page.lock); ++ tcep = __this_cpu_read(tce_page.page); + + if (!tcep) { + tcep = (__be64 *)__get_free_page(GFP_ATOMIC); + if (!tcep) { +- local_irq_enable(); ++ local_unlock_irq(&tce_page.lock); + return -ENOMEM; + } +- __this_cpu_write(tce_page, tcep); ++ __this_cpu_write(tce_page.page, tcep); + } + + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE; +@@ -506,7 +515,7 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, + + /* error cleanup: caller will clear whole range */ + +- local_irq_enable(); ++ local_unlock_irq(&tce_page.lock); + return rc; + } + +-- +2.43.0 + diff --git a/debian/patches-rt/0047-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch b/debian/patches-rt/0047-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch new file mode 100644 index 000000000..937a75d99 --- /dev/null +++ b/debian/patches-rt/0047-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch @@ -0,0 +1,46 @@ +From 34f52991170848510810b486dd6fe9a19cbe4c46 Mon Sep 17 00:00:00 2001 +From: Bogdan Purcareata <bogdan.purcareata@freescale.com> +Date: Fri, 24 Apr 2015 15:53:13 +0000 +Subject: [PATCH 47/62] powerpc/kvm: Disable in-kernel MPIC emulation for + PREEMPT_RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +While converting the openpic emulation code to use a raw_spinlock_t enables +guests to run on RT, there's still a performance issue. For interrupts sent in +directed delivery mode with a multiple CPU mask, the emulated openpic will loop +through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop +through all the pending interrupts for that VCPU. This is done while holding the +raw_lock, meaning that in all this time the interrupts and preemption are +disabled on the host Linux. A malicious user app can max both these number and +cause a DoS. + +This temporary fix is sent for two reasons. First is so that users who want to +use the in-kernel MPIC emulation are aware of the potential latencies, thus +making sure that the hardware MPIC and their usage scenario does not involve +interrupts sent in directed delivery mode, and the number of possible pending +interrupts is kept small. Secondly, this should incentivize the development of a +proper openpic emulation that would be better suited for RT. + +Acked-by: Scott Wood <scottwood@freescale.com> +Signed-off-by: Bogdan Purcareata <bogdan.purcareata@freescale.com> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/powerpc/kvm/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig +index a9f57dad6d91..a0b528d4bb7c 100644 +--- a/arch/powerpc/kvm/Kconfig ++++ b/arch/powerpc/kvm/Kconfig +@@ -225,6 +225,7 @@ config KVM_E500MC + config KVM_MPIC + bool "KVM in-kernel MPIC emulation" + depends on KVM && PPC_E500 ++ depends on !PREEMPT_RT + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQFD + select HAVE_KVM_IRQ_ROUTING +-- +2.43.0 + diff --git a/debian/patches-rt/0048-powerpc-stackprotector-work-around-stack-guard-init-.patch b/debian/patches-rt/0048-powerpc-stackprotector-work-around-stack-guard-init-.patch new file mode 100644 index 000000000..df6dbe080 --- /dev/null +++ b/debian/patches-rt/0048-powerpc-stackprotector-work-around-stack-guard-init-.patch @@ -0,0 +1,37 @@ +From d2e9a96e5570459dd886f87bf81c7714fb0a2108 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 26 Mar 2019 18:31:29 +0100 +Subject: [PATCH 48/62] powerpc/stackprotector: work around stack-guard init + from atomic +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +This is invoked from the secondary CPU in atomic context. On x86 we use +tsc instead. On Power we XOR it against mftb() so lets use stack address +as the initial value. + +Cc: stable-rt@vger.kernel.org +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/powerpc/include/asm/stackprotector.h | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h +index 1c8460e23583..b1653c160bab 100644 +--- a/arch/powerpc/include/asm/stackprotector.h ++++ b/arch/powerpc/include/asm/stackprotector.h +@@ -24,7 +24,11 @@ static __always_inline void boot_init_stack_canary(void) + unsigned long canary; + + /* Try to get a semi random initial value. */ ++#ifdef CONFIG_PREEMPT_RT ++ canary = (unsigned long)&canary; ++#else + canary = get_random_canary(); ++#endif + canary ^= mftb(); + canary ^= LINUX_VERSION_CODE; + canary &= CANARY_MASK; +-- +2.43.0 + diff --git a/debian/patches-rt/0049-POWERPC-Allow-to-enable-RT.patch b/debian/patches-rt/0049-POWERPC-Allow-to-enable-RT.patch new file mode 100644 index 000000000..72ea26d09 --- /dev/null +++ b/debian/patches-rt/0049-POWERPC-Allow-to-enable-RT.patch @@ -0,0 +1,37 @@ +From 93941796079cb4515170adf454f9218adc89856d Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 11 Oct 2019 13:14:41 +0200 +Subject: [PATCH 49/62] POWERPC: Allow to enable RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Allow to select RT. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + arch/powerpc/Kconfig | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig +index 0eff864d6ec3..df697d3f68cd 100644 +--- a/arch/powerpc/Kconfig ++++ b/arch/powerpc/Kconfig +@@ -151,6 +151,7 @@ config PPC + select ARCH_STACKWALK + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x ++ select ARCH_SUPPORTS_RT if HAVE_POSIX_CPU_TIMERS_TASK_WORK + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_USE_MEMTEST +@@ -245,6 +246,7 @@ config PPC + select HAVE_PREEMPT_LAZY + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE ++ select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM + select HAVE_RSEQ + select HAVE_SETUP_PER_CPU_AREA if PPC64 + select HAVE_SOFTIRQ_ON_OWN_STACK +-- +2.43.0 + diff --git a/debian/patches-rt/0050-sysfs-Add-sys-kernel-realtime-entry.patch b/debian/patches-rt/0050-sysfs-Add-sys-kernel-realtime-entry.patch new file mode 100644 index 000000000..bebe5bd1b --- /dev/null +++ b/debian/patches-rt/0050-sysfs-Add-sys-kernel-realtime-entry.patch @@ -0,0 +1,55 @@ +From b2103f830327ab5d1e6f49134c22d3c5adfb52cc Mon Sep 17 00:00:00 2001 +From: Clark Williams <williams@redhat.com> +Date: Sat, 30 Jul 2011 21:55:53 -0500 +Subject: [PATCH 50/62] sysfs: Add /sys/kernel/realtime entry +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Add a /sys/kernel entry to indicate that the kernel is a +realtime kernel. + +Clark says that he needs this for udev rules, udev needs to evaluate +if its a PREEMPT_RT kernel a few thousand times and parsing uname +output is too slow or so. + +Are there better solutions? Should it exist and return 0 on !-rt? + +Signed-off-by: Clark Williams <williams@redhat.com> +Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + kernel/ksysfs.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c +index 65dba9076f31..ab18048e2186 100644 +--- a/kernel/ksysfs.c ++++ b/kernel/ksysfs.c +@@ -142,6 +142,15 @@ KERNEL_ATTR_RO(vmcoreinfo); + + #endif /* CONFIG_CRASH_CORE */ + ++#if defined(CONFIG_PREEMPT_RT) ++static ssize_t realtime_show(struct kobject *kobj, ++ struct kobj_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "%d\n", 1); ++} ++KERNEL_ATTR_RO(realtime); ++#endif ++ + /* whether file capabilities are enabled */ + static ssize_t fscaps_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +@@ -232,6 +241,9 @@ static struct attribute * kernel_attrs[] = { + #ifndef CONFIG_TINY_RCU + &rcu_expedited_attr.attr, + &rcu_normal_attr.attr, ++#endif ++#ifdef CONFIG_PREEMPT_RT ++ &realtime_attr.attr, + #endif + NULL + }; +-- +2.43.0 + diff --git a/debian/patches-rt/0051-Add-localversion-for-RT-release.patch b/debian/patches-rt/0051-Add-localversion-for-RT-release.patch new file mode 100644 index 000000000..f2ecbea44 --- /dev/null +++ b/debian/patches-rt/0051-Add-localversion-for-RT-release.patch @@ -0,0 +1,22 @@ +From 9c7d6e723acbbc184d1dd04811863378699134fb Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Fri, 8 Jul 2011 20:25:16 +0200 +Subject: [PATCH 51/62] Add localversion for -RT release +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +--- + localversion-rt | 1 + + 1 file changed, 1 insertion(+) + create mode 100644 localversion-rt + +diff --git a/localversion-rt b/localversion-rt +new file mode 100644 +index 000000000000..045478966e9f +--- /dev/null ++++ b/localversion-rt +@@ -0,0 +1 @@ ++-rt7 +-- +2.43.0 + diff --git a/debian/patches-rt/0052-Linux-6.1.46-rt13-REBASE.patch b/debian/patches-rt/0052-Linux-6.1.46-rt13-REBASE.patch new file mode 100644 index 000000000..3096368f3 --- /dev/null +++ b/debian/patches-rt/0052-Linux-6.1.46-rt13-REBASE.patch @@ -0,0 +1,21 @@ +From 3f783498b292a814f8f364bbfd0efbfc1be6d30f Mon Sep 17 00:00:00 2001 +From: Clark Williams <clrkwllms@kernel.org> +Date: Fri, 18 Aug 2023 10:45:35 -0500 +Subject: [PATCH 52/62] 'Linux 6.1.46-rt13 REBASE' +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Signed-off-by: Clark Williams <clrkwllms@kernel.org> +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 045478966e9f..9f7d0bdbffb1 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt7 ++-rt13 +-- +2.43.0 + diff --git a/debian/patches-rt/0053-io-mapping-don-t-disable-preempt-on-RT-in-io_mapping.patch b/debian/patches-rt/0053-io-mapping-don-t-disable-preempt-on-RT-in-io_mapping.patch new file mode 100644 index 000000000..b4b695541 --- /dev/null +++ b/debian/patches-rt/0053-io-mapping-don-t-disable-preempt-on-RT-in-io_mapping.patch @@ -0,0 +1,93 @@ +From 53c6a09e670e985d37ca05785a0155ab51b49cf4 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 10 Mar 2023 17:29:05 +0100 +Subject: [PATCH 53/62] io-mapping: don't disable preempt on RT in + io_mapping_map_atomic_wc(). +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +io_mapping_map_atomic_wc() disables preemption and pagefaults for +historical reasons. The conversion to io_mapping_map_local_wc(), which +only disables migration, cannot be done wholesale because quite some call +sites need to be updated to accommodate with the changed semantics. + +On PREEMPT_RT enabled kernels the io_mapping_map_atomic_wc() semantics are +problematic due to the implicit disabling of preemption which makes it +impossible to acquire 'sleeping' spinlocks within the mapped atomic +sections. + +PREEMPT_RT replaces the preempt_disable() with a migrate_disable() for +more than a decade. It could be argued that this is a justification to do +this unconditionally, but PREEMPT_RT covers only a limited number of +architectures and it disables some functionality which limits the coverage +further. + +Limit the replacement to PREEMPT_RT for now. This is also done +kmap_atomic(). + +Link: https://lkml.kernel.org/r/20230310162905.O57Pj7hh@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Reported-by: Richard Weinberger <richard.weinberger@gmail.com> + Link: https://lore.kernel.org/CAFLxGvw0WMxaMqYqJ5WgvVSbKHq2D2xcXTOgMCpgq9nDC-MWTQ@mail.gmail.com +Cc: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +(cherry picked from commit 7eb16f23b9a415f062db22739e59bb144e0b24ab) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + include/linux/io-mapping.h | 20 ++++++++++++++++---- + 1 file changed, 16 insertions(+), 4 deletions(-) + +diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h +index 66a774d2710e..b08532b8fba7 100644 +--- a/include/linux/io-mapping.h ++++ b/include/linux/io-mapping.h +@@ -69,7 +69,10 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, + + BUG_ON(offset >= mapping->size); + phys_addr = mapping->base + offset; +- preempt_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); ++ else ++ migrate_disable(); + pagefault_disable(); + return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot); + } +@@ -79,7 +82,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr) + { + kunmap_local_indexed((void __force *)vaddr); + pagefault_enable(); +- preempt_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); ++ else ++ migrate_enable(); + } + + static inline void __iomem * +@@ -162,7 +168,10 @@ static inline void __iomem * + io_mapping_map_atomic_wc(struct io_mapping *mapping, + unsigned long offset) + { +- preempt_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); ++ else ++ migrate_disable(); + pagefault_disable(); + return io_mapping_map_wc(mapping, offset, PAGE_SIZE); + } +@@ -172,7 +181,10 @@ io_mapping_unmap_atomic(void __iomem *vaddr) + { + io_mapping_unmap(vaddr); + pagefault_enable(); +- preempt_enable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); ++ else ++ migrate_enable(); + } + + static inline void __iomem * +-- +2.43.0 + diff --git a/debian/patches-rt/0054-locking-rwbase-Mitigate-indefinite-writer-starvation.patch b/debian/patches-rt/0054-locking-rwbase-Mitigate-indefinite-writer-starvation.patch new file mode 100644 index 000000000..1940c66d7 --- /dev/null +++ b/debian/patches-rt/0054-locking-rwbase-Mitigate-indefinite-writer-starvation.patch @@ -0,0 +1,63 @@ +From 8785dde5198dc91cbb518044e1c6d301ef9a9857 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 21 Mar 2023 17:11:40 +0100 +Subject: [PATCH 54/62] locking/rwbase: Mitigate indefinite writer starvation +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +On PREEMPT_RT, rw_semaphore and rwlock_t locks are unfair to writers. +Readers can indefinitely acquire the lock unless the writer fully acquired +the lock, which might never happen if there is always a reader in the +critical section owning the lock. + +Mel Gorman reported that since LTP-20220121 the dio_truncate test case +went from having 1 reader to having 16 readers and that number of readers +is sufficient to prevent the down_write ever succeeding while readers +exist. Eventually the test is killed after 30 minutes as a failure. + +Mel proposed a timeout to limit how long a writer can be blocked until +the reader is forced into the slowpath. + +Thomas argued that there is no added value by providing this timeout. From +a PREEMPT_RT point of view, there are no critical rw_semaphore or rwlock_t +locks left where the reader must be preferred. + +Mitigate indefinite writer starvation by forcing the READER into the +slowpath once the WRITER attempts to acquire the lock. + +Reported-by: Mel Gorman <mgorman@techsingularity.net> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Ingo Molnar <mingo@kernel.org> +Acked-by: Mel Gorman <mgorman@techsingularity.net> +Link: https://lore.kernel.org/877cwbq4cq.ffs@tglx +Link: https://lore.kernel.org/r/20230321161140.HMcQEhHb@linutronix.de +Cc: Linus Torvalds <torvalds@linux-foundation.org> +(cherry picked from commit 286deb7ec03d941664ac3ffaff58814b454adf65) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + kernel/locking/rwbase_rt.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c +index c201aadb9301..25ec0239477c 100644 +--- a/kernel/locking/rwbase_rt.c ++++ b/kernel/locking/rwbase_rt.c +@@ -72,15 +72,6 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, + int ret; + + raw_spin_lock_irq(&rtm->wait_lock); +- /* +- * Allow readers, as long as the writer has not completely +- * acquired the semaphore for write. +- */ +- if (atomic_read(&rwb->readers) != WRITER_BIAS) { +- atomic_inc(&rwb->readers); +- raw_spin_unlock_irq(&rtm->wait_lock); +- return 0; +- } + + /* + * Call into the slow lock path with the rtmutex->wait_lock +-- +2.43.0 + diff --git a/debian/patches-rt/0055-revert-softirq-Let-ksoftirqd-do-its-job.patch b/debian/patches-rt/0055-revert-softirq-Let-ksoftirqd-do-its-job.patch new file mode 100644 index 000000000..c9ed7f6f6 --- /dev/null +++ b/debian/patches-rt/0055-revert-softirq-Let-ksoftirqd-do-its-job.patch @@ -0,0 +1,108 @@ +From fcdb9b29c6d58895a386ac23229564fad2c316b5 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni <pabeni@redhat.com> +Date: Mon, 8 May 2023 08:17:44 +0200 +Subject: [PATCH 55/62] revert: "softirq: Let ksoftirqd do its job" +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Due to the mentioned commit, when the ksoftirqd processes take charge +of softirq processing, the system can experience high latencies. + +In the past a few workarounds have been implemented for specific +side-effects of the above: + +commit 1ff688209e2e ("watchdog: core: make sure the watchdog_worker is not deferred") +commit 8d5755b3f77b ("watchdog: softdog: fire watchdog even if softirqs do not get to run") +commit 217f69743681 ("net: busy-poll: allow preemption in sk_busy_loop()") +commit 3c53776e29f8 ("Mark HI and TASKLET softirq synchronous") + +but the latency problem still exists in real-life workloads, see the +link below. + +The reverted commit intended to solve a live-lock scenario that can now +be addressed with the NAPI threaded mode, introduced with commit +29863d41bb6e ("net: implement threaded-able napi poll loop support"), +and nowadays in a pretty stable status. + +While a complete solution to put softirq processing under nice resource +control would be preferable, that has proven to be a very hard task. In +the short term, remove the main pain point, and also simplify a bit the +current softirq implementation. + +Note that this change also reverts commit 3c53776e29f8 ("Mark HI and +TASKLET softirq synchronous") and commit 1342d8080f61 ("softirq: Don't +skip softirq execution when softirq thread is parking"), which are +direct follow-ups of the feature commit. A single change is preferred to +avoid known bad intermediate states introduced by a patch series +reverting them individually. + +Link: https://lore.kernel.org/netdev/305d7742212cbe98621b16be782b0562f1012cb6.camel@redhat.com/ +Signed-off-by: Paolo Abeni <pabeni@redhat.com> +Tested-by: Jason Xing <kerneljasonxing@gmail.com> +Reviewed-by: Jakub Kicinski <kuba@kernel.org> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/57e66b364f1b6f09c9bc0316742c3b14f4ce83bd.1683526542.git.pabeni@redhat.com +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +(cherry picked from commit b8a04a538ed4755dc97c403ee3b8dd882955c98c) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + kernel/softirq.c | 22 ++-------------------- + 1 file changed, 2 insertions(+), 20 deletions(-) + +diff --git a/kernel/softirq.c b/kernel/softirq.c +index 82f3e68fbe22..af9e879bbbf7 100644 +--- a/kernel/softirq.c ++++ b/kernel/softirq.c +@@ -80,21 +80,6 @@ static void wakeup_softirqd(void) + wake_up_process(tsk); + } + +-/* +- * If ksoftirqd is scheduled, we do not want to process pending softirqs +- * right now. Let ksoftirqd handle this at its own rate, to get fairness, +- * unless we're doing some of the synchronous softirqs. +- */ +-#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ)) +-static bool ksoftirqd_running(unsigned long pending) +-{ +- struct task_struct *tsk = __this_cpu_read(ksoftirqd); +- +- if (pending & SOFTIRQ_NOW_MASK) +- return false; +- return tsk && task_is_running(tsk) && !__kthread_should_park(tsk); +-} +- + #ifdef CONFIG_TRACE_IRQFLAGS + DEFINE_PER_CPU(int, hardirqs_enabled); + DEFINE_PER_CPU(int, hardirq_context); +@@ -236,7 +221,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) + goto out; + + pending = local_softirq_pending(); +- if (!pending || ksoftirqd_running(pending)) ++ if (!pending) + goto out; + + /* +@@ -432,9 +417,6 @@ static inline bool should_wake_ksoftirqd(void) + + static inline void invoke_softirq(void) + { +- if (ksoftirqd_running(local_softirq_pending())) +- return; +- + if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) { + #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK + /* +@@ -468,7 +450,7 @@ asmlinkage __visible void do_softirq(void) + + pending = local_softirq_pending(); + +- if (pending && !ksoftirqd_running(pending)) ++ if (pending) + do_softirq_own_stack(); + + local_irq_restore(flags); +-- +2.43.0 + diff --git a/debian/patches-rt/0056-debugobjects-locking-Annotate-debug_object_fill_pool.patch b/debian/patches-rt/0056-debugobjects-locking-Annotate-debug_object_fill_pool.patch new file mode 100644 index 000000000..9528f771b --- /dev/null +++ b/debian/patches-rt/0056-debugobjects-locking-Annotate-debug_object_fill_pool.patch @@ -0,0 +1,177 @@ +From c082e5d28e56252dca01b53c553bba5cd152fec1 Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 25 Apr 2023 17:03:13 +0200 +Subject: [PATCH 56/62] debugobjects,locking: Annotate debug_object_fill_pool() + wait type violation +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +There is an explicit wait-type violation in debug_object_fill_pool() +for PREEMPT_RT=n kernels which allows them to more easily fill the +object pool and reduce the chance of allocation failures. + +Lockdep's wait-type checks are designed to check the PREEMPT_RT +locking rules even for PREEMPT_RT=n kernels and object to this, so +create a lockdep annotation to allow this to stand. + +Specifically, create a 'lock' type that overrides the inner wait-type +while it is held -- allowing one to temporarily raise it, such that +the violation is hidden. + +Reported-by: Vlastimil Babka <vbabka@suse.cz> +Reported-by: Qi Zheng <zhengqi.arch@bytedance.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Tested-by: Qi Zheng <zhengqi.arch@bytedance.com> +Link: https://lkml.kernel.org/r/20230429100614.GA1489784@hirez.programming.kicks-ass.net +(cherry picked from commit 0cce06ba859a515bd06224085d3addb870608b6d) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + include/linux/lockdep.h | 14 ++++++++++++++ + include/linux/lockdep_types.h | 1 + + kernel/locking/lockdep.c | 28 +++++++++++++++++++++------- + lib/debugobjects.c | 15 +++++++++++++-- + 4 files changed, 49 insertions(+), 9 deletions(-) + +diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h +index 1023f349af71..a3329fb49b33 100644 +--- a/include/linux/lockdep.h ++++ b/include/linux/lockdep.h +@@ -339,6 +339,16 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); + #define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c)) + #define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c)) + ++/* ++ * Must use lock_map_aquire_try() with override maps to avoid ++ * lockdep thinking they participate in the block chain. ++ */ ++#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \ ++ struct lockdep_map _name = { \ ++ .name = #_name "-wait-type-override", \ ++ .wait_type_inner = _wait_type, \ ++ .lock_type = LD_LOCK_WAIT_OVERRIDE, } ++ + #else /* !CONFIG_LOCKDEP */ + + static inline void lockdep_init_task(struct task_struct *task) +@@ -427,6 +437,9 @@ extern int lockdep_is_held(const void *); + #define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) + #define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) + ++#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \ ++ struct lockdep_map __maybe_unused _name = {} ++ + #endif /* !LOCKDEP */ + + enum xhlock_context_t { +@@ -551,6 +564,7 @@ do { \ + #define rwsem_release(l, i) lock_release(l, i) + + #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) ++#define lock_map_acquire_try(l) lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_) + #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) + #define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_) + #define lock_map_release(l) lock_release(l, _THIS_IP_) +diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h +index d22430840b53..59f4fb1626ea 100644 +--- a/include/linux/lockdep_types.h ++++ b/include/linux/lockdep_types.h +@@ -33,6 +33,7 @@ enum lockdep_wait_type { + enum lockdep_lock_type { + LD_LOCK_NORMAL = 0, /* normal, catch all */ + LD_LOCK_PERCPU, /* percpu */ ++ LD_LOCK_WAIT_OVERRIDE, /* annotation */ + LD_LOCK_MAX, + }; + +diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c +index 3b38303ed27b..a046e03c7ead 100644 +--- a/kernel/locking/lockdep.c ++++ b/kernel/locking/lockdep.c +@@ -2245,6 +2245,9 @@ static inline bool usage_match(struct lock_list *entry, void *mask) + + static inline bool usage_skip(struct lock_list *entry, void *mask) + { ++ if (entry->class->lock_type == LD_LOCK_NORMAL) ++ return false; ++ + /* + * Skip local_lock() for irq inversion detection. + * +@@ -2271,14 +2274,16 @@ static inline bool usage_skip(struct lock_list *entry, void *mask) + * As a result, we will skip local_lock(), when we search for irq + * inversion bugs. + */ +- if (entry->class->lock_type == LD_LOCK_PERCPU) { +- if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG)) +- return false; ++ if (entry->class->lock_type == LD_LOCK_PERCPU && ++ DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG)) ++ return false; + +- return true; +- } ++ /* ++ * Skip WAIT_OVERRIDE for irq inversion detection -- it's not actually ++ * a lock and only used to override the wait_type. ++ */ + +- return false; ++ return true; + } + + /* +@@ -4745,7 +4750,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) + + for (; depth < curr->lockdep_depth; depth++) { + struct held_lock *prev = curr->held_locks + depth; +- u8 prev_inner = hlock_class(prev)->wait_type_inner; ++ struct lock_class *class = hlock_class(prev); ++ u8 prev_inner = class->wait_type_inner; + + if (prev_inner) { + /* +@@ -4755,6 +4761,14 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) + * Also due to trylocks. + */ + curr_inner = min(curr_inner, prev_inner); ++ ++ /* ++ * Allow override for annotations -- this is typically ++ * only valid/needed for code that only exists when ++ * CONFIG_PREEMPT_RT=n. ++ */ ++ if (unlikely(class->lock_type == LD_LOCK_WAIT_OVERRIDE)) ++ curr_inner = prev_inner; + } + } + +diff --git a/lib/debugobjects.c b/lib/debugobjects.c +index dacb80c22c4f..3c9e00e207dc 100644 +--- a/lib/debugobjects.c ++++ b/lib/debugobjects.c +@@ -600,10 +600,21 @@ static void debug_objects_fill_pool(void) + { + /* + * On RT enabled kernels the pool refill must happen in preemptible +- * context: ++ * context -- for !RT kernels we rely on the fact that spinlock_t and ++ * raw_spinlock_t are basically the same type and this lock-type ++ * inversion works just fine. + */ +- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) { ++ /* ++ * Annotate away the spinlock_t inside raw_spinlock_t warning ++ * by temporarily raising the wait-type to WAIT_SLEEP, matching ++ * the preemptible() condition above. ++ */ ++ static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP); ++ lock_map_acquire_try(&fill_pool_map); + fill_pool(); ++ lock_map_release(&fill_pool_map); ++ } + } + + static void +-- +2.43.0 + diff --git a/debian/patches-rt/0057-sched-avoid-false-lockdep-splat-in-put_task_struct.patch b/debian/patches-rt/0057-sched-avoid-false-lockdep-splat-in-put_task_struct.patch new file mode 100644 index 000000000..9da36bc08 --- /dev/null +++ b/debian/patches-rt/0057-sched-avoid-false-lockdep-splat-in-put_task_struct.patch @@ -0,0 +1,52 @@ +From 5c27e6fdf46d68180a46fdf7944aa7e4668680c3 Mon Sep 17 00:00:00 2001 +From: Wander Lairson Costa <wander@redhat.com> +Date: Wed, 14 Jun 2023 09:23:22 -0300 +Subject: [PATCH 57/62] sched: avoid false lockdep splat in put_task_struct() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +In put_task_struct(), a spin_lock is indirectly acquired under the kernel +stock. When running the kernel in real-time (RT) configuration, the +operation is dispatched to a preemptible context call to ensure +guaranteed preemption. However, if PROVE_RAW_LOCK_NESTING is enabled +and __put_task_struct() is called while holding a raw_spinlock, lockdep +incorrectly reports an "Invalid lock context" in the stock kernel. + +This false splat occurs because lockdep is unaware of the different +route taken under RT. To address this issue, override the inner wait +type to prevent the false lockdep splat. + +Signed-off-by: Wander Lairson Costa <wander@redhat.com> +Suggested-by: Oleg Nesterov <oleg@redhat.com> +Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Suggested-by: Peter Zijlstra <peterz@infradead.org> +Cc: Steven Rostedt <rostedt@goodmis.org> +Cc: Luis Goncalves <lgoncalv@redhat.com> +Link: https://lore.kernel.org/r/20230614122323.37957-3-wander@redhat.com +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +(cherry picked from commit a5e446e728e89d5f5c5e427cc919bc7813c64c28) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + include/linux/sched/task.h | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h +index 7291fb6399d2..de7ebd2bf3ba 100644 +--- a/include/linux/sched/task.h ++++ b/include/linux/sched/task.h +@@ -141,8 +141,12 @@ static inline void put_task_struct(struct task_struct *t) + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && !preemptible()) + call_rcu(&t->rcu, __put_task_struct_rcu_cb); +- else ++ else { ++ static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP); ++ lock_map_acquire_try(&put_task_map); + __put_task_struct(t); ++ lock_map_release(&put_task_map); ++ } + } + + static inline void put_task_struct_many(struct task_struct *t, int nr) +-- +2.43.0 + diff --git a/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch b/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch new file mode 100644 index 000000000..68d9f5008 --- /dev/null +++ b/debian/patches-rt/0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch @@ -0,0 +1,97 @@ +From 9512a9467dec62e03f2df4f15af9a38332b8de58 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Fri, 23 Jun 2023 22:15:17 +0200 +Subject: [PATCH 58/62] mm/page_alloc: Use write_seqlock_irqsave() instead + write_seqlock() + local_irq_save(). +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +__build_all_zonelists() acquires zonelist_update_seq by first disabling +interrupts via local_irq_save() and then acquiring the seqlock with +write_seqlock(). This is troublesome and leads to problems on +PREEMPT_RT. The problem is that the inner spinlock_t becomes a sleeping +lock on PREEMPT_RT and must not be acquired with disabled interrupts. + +The API provides write_seqlock_irqsave() which does the right thing in +one step. +printk_deferred_enter() has to be invoked in non-migrate-able context to +ensure that deferred printing is enabled and disabled on the same CPU. +This is the case after zonelist_update_seq has been acquired. + +There was discussion on the first submission that the order should be: + local_irq_disable(); + printk_deferred_enter(); + write_seqlock(); + +to avoid pitfalls like having an unaccounted printk() coming from +write_seqlock_irqsave() before printk_deferred_enter() is invoked. The +only origin of such a printk() can be a lockdep splat because the +lockdep annotation happens after the sequence count is incremented. +This is exceptional and subject to change. + +It was also pointed that PREEMPT_RT can be affected by the printk +problem since its write_seqlock_irqsave() does not really disable +interrupts. This isn't the case because PREEMPT_RT's printk +implementation differs from the mainline implementation in two important +aspects: +- Printing happens in a dedicated threads and not at during the + invocation of printk(). +- In emergency cases where synchronous printing is used, a different + driver is used which does not use tty_port::lock. + +Acquire zonelist_update_seq with write_seqlock_irqsave() and then defer +printk output. + +Fixes: 1007843a91909 ("mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock") +Acked-by: Michal Hocko <mhocko@suse.com> +Reviewed-by: David Hildenbrand <david@redhat.com> +Link: https://lore.kernel.org/r/20230623201517.yw286Knb@linutronix.de +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +(cherry picked from commit 4d1139baae8bc4fff3728d1d204bdb04c13dbe10) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + mm/page_alloc.c | 15 ++++++--------- + 1 file changed, 6 insertions(+), 9 deletions(-) + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index 4583f8a42d91..835b69a64f4f 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -6588,19 +6588,17 @@ static void __build_all_zonelists(void *data) + unsigned long flags; + + /* +- * Explicitly disable this CPU's interrupts before taking seqlock +- * to prevent any IRQ handler from calling into the page allocator +- * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock. ++ * The zonelist_update_seq must be acquired with irqsave because the ++ * reader can be invoked from IRQ with GFP_ATOMIC. + */ +- local_irq_save(flags); ++ write_seqlock_irqsave(&zonelist_update_seq, flags); + /* +- * Explicitly disable this CPU's synchronous printk() before taking +- * seqlock to prevent any printk() from trying to hold port->lock, for ++ * Also disable synchronous printk() to prevent any printk() from ++ * trying to hold port->lock, for + * tty_insert_flip_string_and_push_buffer() on other CPU might be + * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held. + */ + printk_deferred_enter(); +- write_seqlock(&zonelist_update_seq); + + #ifdef CONFIG_NUMA + memset(node_load, 0, sizeof(node_load)); +@@ -6637,9 +6635,8 @@ static void __build_all_zonelists(void *data) + #endif + } + +- write_sequnlock(&zonelist_update_seq); + printk_deferred_exit(); +- local_irq_restore(flags); ++ write_sequnlock_irqrestore(&zonelist_update_seq, flags); + } + + static noinline void __init +-- +2.43.0 + diff --git a/debian/patches-rt/0059-bpf-Remove-in_atomic-from-bpf_link_put.patch b/debian/patches-rt/0059-bpf-Remove-in_atomic-from-bpf_link_put.patch new file mode 100644 index 000000000..8abf764af --- /dev/null +++ b/debian/patches-rt/0059-bpf-Remove-in_atomic-from-bpf_link_put.patch @@ -0,0 +1,120 @@ +From 05999b640eb04be872e5491a040701fcddc73349 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 14 Jun 2023 10:34:30 +0200 +Subject: [PATCH 59/62] bpf: Remove in_atomic() from bpf_link_put(). +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +bpf_free_inode() is invoked as a RCU callback. Usually RCU callbacks are +invoked within softirq context. By setting rcutree.use_softirq=0 boot +option the RCU callbacks will be invoked in a per-CPU kthread with +bottom halves disabled which implies a RCU read section. + +On PREEMPT_RT the context remains fully preemptible. The RCU read +section however does not allow schedule() invocation. The latter happens +in mutex_lock() performed by bpf_trampoline_unlink_prog() originated +from bpf_link_put(). + +It was pointed out that the bpf_link_put() invocation should not be +delayed if originated from close(). It was also pointed out that other +invocations from within a syscall should also avoid the workqueue. +Everyone else should use workqueue by default to remain safe in the +future (while auditing the code, every caller was preemptible except for +the RCU case). + +Let bpf_link_put() use the worker unconditionally. Add +bpf_link_put_direct() which will directly free the resources and is used +by close() and from within __sys_bpf(). + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Andrii Nakryiko <andrii@kernel.org> +Link: https://lore.kernel.org/bpf/20230614083430.oENawF8f@linutronix.de +(cherry picked from commit ab5d47bd41b1db82c295b0e751e2b822b43a4b5a) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + kernel/bpf/syscall.c | 29 ++++++++++++++++------------- + 1 file changed, 16 insertions(+), 13 deletions(-) + +diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c +index c0915e2424f1..f8ba6e0a5c08 100644 +--- a/kernel/bpf/syscall.c ++++ b/kernel/bpf/syscall.c +@@ -2732,28 +2732,31 @@ static void bpf_link_put_deferred(struct work_struct *work) + bpf_link_free(link); + } + +-/* bpf_link_put can be called from atomic context, but ensures that resources +- * are freed from process context ++/* bpf_link_put might be called from atomic context. It needs to be called ++ * from sleepable context in order to acquire sleeping locks during the process. + */ + void bpf_link_put(struct bpf_link *link) + { + if (!atomic64_dec_and_test(&link->refcnt)) + return; + +- if (in_atomic()) { +- INIT_WORK(&link->work, bpf_link_put_deferred); +- schedule_work(&link->work); +- } else { +- bpf_link_free(link); +- } ++ INIT_WORK(&link->work, bpf_link_put_deferred); ++ schedule_work(&link->work); + } + EXPORT_SYMBOL(bpf_link_put); + ++static void bpf_link_put_direct(struct bpf_link *link) ++{ ++ if (!atomic64_dec_and_test(&link->refcnt)) ++ return; ++ bpf_link_free(link); ++} ++ + static int bpf_link_release(struct inode *inode, struct file *filp) + { + struct bpf_link *link = filp->private_data; + +- bpf_link_put(link); ++ bpf_link_put_direct(link); + return 0; + } + +@@ -4674,7 +4677,7 @@ static int link_update(union bpf_attr *attr) + if (ret) + bpf_prog_put(new_prog); + out_put_link: +- bpf_link_put(link); ++ bpf_link_put_direct(link); + return ret; + } + +@@ -4697,7 +4700,7 @@ static int link_detach(union bpf_attr *attr) + else + ret = -EOPNOTSUPP; + +- bpf_link_put(link); ++ bpf_link_put_direct(link); + return ret; + } + +@@ -4767,7 +4770,7 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr) + + fd = bpf_link_new_fd(link); + if (fd < 0) +- bpf_link_put(link); ++ bpf_link_put_direct(link); + + return fd; + } +@@ -4844,7 +4847,7 @@ static int bpf_iter_create(union bpf_attr *attr) + return PTR_ERR(link); + + err = bpf_iter_new_fd(link); +- bpf_link_put(link); ++ bpf_link_put_direct(link); + + return err; + } +-- +2.43.0 + diff --git a/debian/patches-rt/0060-posix-timers-Ensure-timer-ID-search-loop-limit-is-va.patch b/debian/patches-rt/0060-posix-timers-Ensure-timer-ID-search-loop-limit-is-va.patch new file mode 100644 index 000000000..8b22f207c --- /dev/null +++ b/debian/patches-rt/0060-posix-timers-Ensure-timer-ID-search-loop-limit-is-va.patch @@ -0,0 +1,115 @@ +From a0d2c56749857956cb8ef1ccf2d982e2c1770f08 Mon Sep 17 00:00:00 2001 +From: Thomas Gleixner <tglx@linutronix.de> +Date: Thu, 1 Jun 2023 20:58:47 +0200 +Subject: [PATCH 60/62] posix-timers: Ensure timer ID search-loop limit is + valid +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +posix_timer_add() tries to allocate a posix timer ID by starting from the +cached ID which was stored by the last successful allocation. + +This is done in a loop searching the ID space for a free slot one by +one. The loop has to terminate when the search wrapped around to the +starting point. + +But that's racy vs. establishing the starting point. That is read out +lockless, which leads to the following problem: + +CPU0 CPU1 +posix_timer_add() + start = sig->posix_timer_id; + lock(hash_lock); + ... posix_timer_add() + if (++sig->posix_timer_id < 0) + start = sig->posix_timer_id; + sig->posix_timer_id = 0; + +So CPU1 can observe a negative start value, i.e. -1, and the loop break +never happens because the condition can never be true: + + if (sig->posix_timer_id == start) + break; + +While this is unlikely to ever turn into an endless loop as the ID space is +huge (INT_MAX), the racy read of the start value caught the attention of +KCSAN and Dmitry unearthed that incorrectness. + +Rewrite it so that all id operations are under the hash lock. + +Reported-by: syzbot+5c54bd3eb218bb595aa9@syzkaller.appspotmail.com +Reported-by: Dmitry Vyukov <dvyukov@google.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Reviewed-by: Frederic Weisbecker <frederic@kernel.org> +Link: https://lore.kernel.org/r/87bkhzdn6g.ffs@tglx + +(cherry picked from commit 8ce8849dd1e78dadcee0ec9acbd259d239b7069f) +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + include/linux/sched/signal.h | 2 +- + kernel/time/posix-timers.c | 31 ++++++++++++++++++------------- + 2 files changed, 19 insertions(+), 14 deletions(-) + +diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h +index 20099268fa25..669e8cff40c7 100644 +--- a/include/linux/sched/signal.h ++++ b/include/linux/sched/signal.h +@@ -135,7 +135,7 @@ struct signal_struct { + #ifdef CONFIG_POSIX_TIMERS + + /* POSIX.1b Interval Timers */ +- int posix_timer_id; ++ unsigned int next_posix_timer_id; + struct list_head posix_timers; + + /* ITIMER_REAL timer for the process */ +diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c +index ed3c4a954398..2d6cf93ca370 100644 +--- a/kernel/time/posix-timers.c ++++ b/kernel/time/posix-timers.c +@@ -140,25 +140,30 @@ static struct k_itimer *posix_timer_by_id(timer_t id) + static int posix_timer_add(struct k_itimer *timer) + { + struct signal_struct *sig = current->signal; +- int first_free_id = sig->posix_timer_id; + struct hlist_head *head; +- int ret = -ENOENT; ++ unsigned int cnt, id; + +- do { ++ /* ++ * FIXME: Replace this by a per signal struct xarray once there is ++ * a plan to handle the resulting CRIU regression gracefully. ++ */ ++ for (cnt = 0; cnt <= INT_MAX; cnt++) { + spin_lock(&hash_lock); +- head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)]; +- if (!__posix_timers_find(head, sig, sig->posix_timer_id)) { ++ id = sig->next_posix_timer_id; ++ ++ /* Write the next ID back. Clamp it to the positive space */ ++ sig->next_posix_timer_id = (id + 1) & INT_MAX; ++ ++ head = &posix_timers_hashtable[hash(sig, id)]; ++ if (!__posix_timers_find(head, sig, id)) { + hlist_add_head_rcu(&timer->t_hash, head); +- ret = sig->posix_timer_id; ++ spin_unlock(&hash_lock); ++ return id; + } +- if (++sig->posix_timer_id < 0) +- sig->posix_timer_id = 0; +- if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT)) +- /* Loop over all possible ids completed */ +- ret = -EAGAIN; + spin_unlock(&hash_lock); +- } while (ret == -ENOENT); +- return ret; ++ } ++ /* POSIX return code when no timer ID could be allocated */ ++ return -EAGAIN; + } + + static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) +-- +2.43.0 + diff --git a/debian/patches-rt/0061-drm-i915-Do-not-disable-preemption-for-resets.patch b/debian/patches-rt/0061-drm-i915-Do-not-disable-preemption-for-resets.patch new file mode 100644 index 000000000..db8400741 --- /dev/null +++ b/debian/patches-rt/0061-drm-i915-Do-not-disable-preemption-for-resets.patch @@ -0,0 +1,110 @@ +From 1d651fe6c67cb3b355cc228f75289657496520ff Mon Sep 17 00:00:00 2001 +From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> +Date: Fri, 18 Aug 2023 22:45:25 -0400 +Subject: [PATCH 61/62] drm/i915: Do not disable preemption for resets +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +[commit 40cd2835ced288789a685aa4aa7bc04b492dcd45 in linux-rt-devel] + +Commit ade8a0f59844 ("drm/i915: Make all GPU resets atomic") added a +preempt disable section over the hardware reset callback to prepare the +driver for being able to reset from atomic contexts. + +In retrospect I can see that the work item at a time was about removing +the struct mutex from the reset path. Code base also briefly entertained +the idea of doing the reset under stop_machine in order to serialize +userspace mmap and temporary glitch in the fence registers (see +eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex"), +but that never materialized and was soon removed in 2caffbf11762 +("drm/i915: Revoke mmaps and prevent access to fence registers across +reset") and replaced with a SRCU based solution. + +As such, as far as I can see, today we still have a requirement that +resets must not sleep (invoked from submission tasklets), but no need to +support invoking them from a truly atomic context. + +Given that the preemption section is problematic on RT kernels, since the +uncore lock becomes a sleeping lock and so is invalid in such section, +lets try and remove it. Potential downside is that our short waits on GPU +to complete the reset may get extended if CPU scheduling interferes, but +in practice that probably isn't a deal breaker. + +In terms of mechanics, since the preemption disabled block is being +removed we just need to replace a few of the wait_for_atomic macros into +busy looping versions which will work (and not complain) when called from +non-atomic sections. + +Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> +Cc: Chris Wilson <chris.p.wilson@intel.com> +Cc: Paul Gortmaker <paul.gortmaker@windriver.com> +Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20230705093025.3689748-1-tvrtko.ursulin@linux.intel.com +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +[PG: backport from v6.4-rt ; minor context fixup caused by b7d70b8b06ed] +Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> +Signed-off-by: Clark Williams <williams@redhat.com> +--- + drivers/gpu/drm/i915/gt/intel_reset.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c +index 10b930eaa8cb..6108a449cd19 100644 +--- a/drivers/gpu/drm/i915/gt/intel_reset.c ++++ b/drivers/gpu/drm/i915/gt/intel_reset.c +@@ -174,13 +174,13 @@ static int i915_do_reset(struct intel_gt *gt, + /* Assert reset for at least 20 usec, and wait for acknowledgement. */ + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); + udelay(50); +- err = wait_for_atomic(i915_in_reset(pdev), 50); ++ err = _wait_for_atomic(i915_in_reset(pdev), 50, 0); + + /* Clear the reset request. */ + pci_write_config_byte(pdev, I915_GDRST, 0); + udelay(50); + if (!err) +- err = wait_for_atomic(!i915_in_reset(pdev), 50); ++ err = _wait_for_atomic(!i915_in_reset(pdev), 50, 0); + + return err; + } +@@ -200,7 +200,7 @@ static int g33_do_reset(struct intel_gt *gt, + struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); + + pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); +- return wait_for_atomic(g4x_reset_complete(pdev), 50); ++ return _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); + } + + static int g4x_do_reset(struct intel_gt *gt, +@@ -217,7 +217,7 @@ static int g4x_do_reset(struct intel_gt *gt, + + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_MEDIA | GRDOM_RESET_ENABLE); +- ret = wait_for_atomic(g4x_reset_complete(pdev), 50); ++ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); + if (ret) { + GT_TRACE(gt, "Wait for media reset failed\n"); + goto out; +@@ -225,7 +225,7 @@ static int g4x_do_reset(struct intel_gt *gt, + + pci_write_config_byte(pdev, I915_GDRST, + GRDOM_RENDER | GRDOM_RESET_ENABLE); +- ret = wait_for_atomic(g4x_reset_complete(pdev), 50); ++ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50, 0); + if (ret) { + GT_TRACE(gt, "Wait for render reset failed\n"); + goto out; +@@ -718,9 +718,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) + intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); + for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { + GT_TRACE(gt, "engine_mask=%x\n", engine_mask); +- preempt_disable(); + ret = reset(gt, engine_mask, retry); +- preempt_enable(); + } + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); + +-- +2.43.0 + diff --git a/debian/patches-rt/0062-Linux-6.1.69-rt21-REBASE.patch b/debian/patches-rt/0062-Linux-6.1.69-rt21-REBASE.patch new file mode 100644 index 000000000..d5e736785 --- /dev/null +++ b/debian/patches-rt/0062-Linux-6.1.69-rt21-REBASE.patch @@ -0,0 +1,21 @@ +From 8aa6a280fc011cccf7cfcc0f5942e3ec6bdd73b4 Mon Sep 17 00:00:00 2001 +From: Clark Williams <clark.williams@gmail.com> +Date: Thu, 28 Dec 2023 23:45:11 -0600 +Subject: [PATCH 62/62] Linux 6.1.69-rt21 REBASE +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.69-rt21.tar.xz + +Signed-off-by: Clark Williams <clark.williams@gmail.com> +--- + localversion-rt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/localversion-rt b/localversion-rt +index 9f7d0bdbffb1..6c6cde1c29e3 100644 +--- a/localversion-rt ++++ b/localversion-rt +@@ -1 +1 @@ +--rt13 ++-rt21 +-- +2.43.0 + diff --git a/debian/patches-rt/series b/debian/patches-rt/series new file mode 100644 index 000000000..a07ceae89 --- /dev/null +++ b/debian/patches-rt/series @@ -0,0 +1,62 @@ +0001-vduse-Remove-include-of-rwlock.h.patch +0002-signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch +0003-sched-Consider-task_struct-saved_state-in-wait_task_.patch +0004-spi-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch +0005-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-d.patch +0006-net-Remove-the-obsolte-u64_stats_fetch_-_irq-users-n.patch +0007-bpf-Remove-the-obsolte-u64_stats_fetch_-_irq-users.patch +0008-u64_stat-Remove-the-obsolete-fetch_irq-variants.patch +0009-net-Avoid-the-IPI-to-free-the.patch +0010-x86-Allow-to-enable-RT.patch +0011-x86-Enable-RT-also-on-32bit.patch +0012-softirq-Use-a-dedicated-thread-for-timer-wakeups.patch +0013-rcutorture-Also-force-sched-priority-to-timersd-on-b.patch +0014-tick-Fix-timer-storm-since-introduction-of-timersd.patch +0015-softirq-Wake-ktimers-thread-also-in-softirq.patch +0016-tpm_tis-fix-stall-after-iowrite-s.patch +0017-zram-Replace-bit-spinlocks-with-spinlock_t-for-PREEM.patch +0018-locking-lockdep-Remove-lockdep_init_map_crosslock.patch +0019-printk-Bring-back-the-RT-bits.patch +0020-printk-add-infrastucture-for-atomic-consoles.patch +0021-serial-8250-implement-write_atomic.patch +0022-printk-avoid-preempt_disable-for-PREEMPT_RT.patch +0023-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch +0024-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch +0025-drm-i915-Don-t-check-for-atomic-context-on-PREEMPT_R.patch +0026-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch +0027-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch +0028-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch +0029-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch +0030-drm-i915-Drop-the-irqs_disabled-check.patch +0031-Revert-drm-i915-Depend-on-PREEMPT_RT.patch +0032-sched-Add-support-for-lazy-preemption.patch +0033-x86-entry-Use-should_resched-in-idtentry_exit_cond_r.patch +0034-x86-Support-for-lazy-preemption.patch +0035-entry-Fix-the-preempt-lazy-fallout.patch +0036-arm-Add-support-for-lazy-preemption.patch +0037-powerpc-Add-support-for-lazy-preemption.patch +0038-arch-arm64-Add-lazy-preempt-support.patch +0039-arm-Disable-jump-label-on-PREEMPT_RT.patch +0040-ARM-enable-irq-in-translation-section-permission-fau.patch +0041-tty-serial-omap-Make-the-locking-RT-aware.patch +0042-tty-serial-pl011-Make-the-locking-work-on-RT.patch +0043-ARM-Allow-to-enable-RT.patch +0044-ARM64-Allow-to-enable-RT.patch +0045-powerpc-traps-Use-PREEMPT_RT.patch +0046-powerpc-pseries-iommu-Use-a-locallock-instead-local_.patch +0047-powerpc-kvm-Disable-in-kernel-MPIC-emulation-for-PRE.patch +0048-powerpc-stackprotector-work-around-stack-guard-init-.patch +0049-POWERPC-Allow-to-enable-RT.patch +0050-sysfs-Add-sys-kernel-realtime-entry.patch +0051-Add-localversion-for-RT-release.patch +0052-Linux-6.1.46-rt13-REBASE.patch +0053-io-mapping-don-t-disable-preempt-on-RT-in-io_mapping.patch +0054-locking-rwbase-Mitigate-indefinite-writer-starvation.patch +0055-revert-softirq-Let-ksoftirqd-do-its-job.patch +0056-debugobjects-locking-Annotate-debug_object_fill_pool.patch +0057-sched-avoid-false-lockdep-splat-in-put_task_struct.patch +0058-mm-page_alloc-Use-write_seqlock_irqsave-instead-writ.patch +0059-bpf-Remove-in_atomic-from-bpf_link_put.patch +0060-posix-timers-Ensure-timer-ID-search-loop-limit-is-va.patch +0061-drm-i915-Do-not-disable-preemption-for-resets.patch +0062-Linux-6.1.69-rt21-REBASE.patch |