summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch')
-rw-r--r--debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch333
1 files changed, 0 insertions, 333 deletions
diff --git a/debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch b/debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch
deleted file mode 100644
index 3e4b5b7bec..0000000000
--- a/debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch
+++ /dev/null
@@ -1,333 +0,0 @@
-From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Date: Mon, 25 Mar 2024 08:40:29 +0100
-Subject: [PATCH 2/4] net: Allow to use SMP threads for backlog NAPI.
-Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.9/older/patches-6.9-rt5.tar.xz
-
-Backlog NAPI is a per-CPU NAPI struct only (with no device behind it)
-used by drivers which don't do NAPI them self, RPS and parts of the
-stack which need to avoid recursive deadlocks while processing a packet.
-
-The non-NAPI driver use the CPU local backlog NAPI. If RPS is enabled
-then a flow for the skb is computed and based on the flow the skb can be
-enqueued on a remote CPU. Scheduling/ raising the softirq (for backlog's
-NAPI) on the remote CPU isn't trivial because the softirq is only
-scheduled on the local CPU and performed after the hardirq is done.
-In order to schedule a softirq on the remote CPU, an IPI is sent to the
-remote CPU which schedules the backlog-NAPI on the then local CPU.
-
-On PREEMPT_RT interrupts are force-threaded. The soft interrupts are
-raised within the interrupt thread and processed after the interrupt
-handler completed still within the context of the interrupt thread. The
-softirq is handled in the context where it originated.
-
-With force-threaded interrupts enabled, ksoftirqd is woken up if a
-softirq is raised from hardirq context. This is the case if it is raised
-from an IPI. Additionally there is a warning on PREEMPT_RT if the
-softirq is raised from the idle thread.
-This was done for two reasons:
-- With threaded interrupts the processing should happen in thread
- context (where it originated) and ksoftirqd is the only thread for
- this context if raised from hardirq. Using the currently running task
- instead would "punish" a random task.
-- Once ksoftirqd is active it consumes all further softirqs until it
- stops running. This changed recently and is no longer the case.
-
-Instead of keeping the backlog NAPI in ksoftirqd (in force-threaded/
-PREEMPT_RT setups) I am proposing NAPI-threads for backlog.
-The "proper" setup with threaded-NAPI is not doable because the threads
-are not pinned to an individual CPU and can be modified by the user.
-Additionally a dummy network device would have to be assigned. Also
-CPU-hotplug has to be considered if additional CPUs show up.
-All this can be probably done/ solved but the smpboot-threads already
-provide this infrastructure.
-
-Sending UDP packets over loopback expects that the packet is processed
-within the call. Delaying it by handing it over to the thread hurts
-performance. It is not beneficial to the outcome if the context switch
-happens immediately after enqueue or after a while to process a few
-packets in a batch.
-There is no need to always use the thread if the backlog NAPI is
-requested on the local CPU. This restores the loopback throuput. The
-performance drops mostly to the same value after enabling RPS on the
-loopback comparing the IPI and the tread result.
-
-Create NAPI-threads for backlog if request during boot. The thread runs
-the inner loop from napi_threaded_poll(), the wait part is different. It
-checks for NAPI_STATE_SCHED (the backlog NAPI can not be disabled).
-
-The NAPI threads for backlog are optional, it has to be enabled via the boot
-argument "thread_backlog_napi". It is mandatory for PREEMPT_RT to avoid the
-wakeup of ksoftirqd from the IPI.
-
-Acked-by: Jakub Kicinski <kuba@kernel.org>
-Link: https://lore.kernel.org/r/20240325074943.289909-3-bigeasy@linutronix.de
-Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
----
- net/core/dev.c | 152 +++++++++++++++++++++++++++++++++++++++++++--------------
- 1 file changed, 115 insertions(+), 37 deletions(-)
-
---- a/net/core/dev.c
-+++ b/net/core/dev.c
-@@ -78,6 +78,7 @@
- #include <linux/slab.h>
- #include <linux/sched.h>
- #include <linux/sched/mm.h>
-+#include <linux/smpboot.h>
- #include <linux/mutex.h>
- #include <linux/rwsem.h>
- #include <linux/string.h>
-@@ -197,6 +198,31 @@ static inline struct hlist_head *dev_ind
- return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
- }
-
-+#ifndef CONFIG_PREEMPT_RT
-+
-+static DEFINE_STATIC_KEY_FALSE(use_backlog_threads_key);
-+
-+static int __init setup_backlog_napi_threads(char *arg)
-+{
-+ static_branch_enable(&use_backlog_threads_key);
-+ return 0;
-+}
-+early_param("thread_backlog_napi", setup_backlog_napi_threads);
-+
-+static bool use_backlog_threads(void)
-+{
-+ return static_branch_unlikely(&use_backlog_threads_key);
-+}
-+
-+#else
-+
-+static bool use_backlog_threads(void)
-+{
-+ return true;
-+}
-+
-+#endif
-+
- static inline void rps_lock_irqsave(struct softnet_data *sd,
- unsigned long *flags)
- {
-@@ -4410,6 +4436,7 @@ EXPORT_SYMBOL(__dev_direct_xmit);
- /*************************************************************************
- * Receiver routines
- *************************************************************************/
-+static DEFINE_PER_CPU(struct task_struct *, backlog_napi);
-
- unsigned int sysctl_skb_defer_max __read_mostly = 64;
- int weight_p __read_mostly = 64; /* old backlog weight */
-@@ -4433,12 +4460,16 @@ static inline void ____napi_schedule(str
- */
- thread = READ_ONCE(napi->thread);
- if (thread) {
-+ if (use_backlog_threads() && thread == raw_cpu_read(backlog_napi))
-+ goto use_local_napi;
-+
- set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
- wake_up_process(thread);
- return;
- }
- }
-
-+use_local_napi:
- list_add_tail(&napi->poll_list, &sd->poll_list);
- WRITE_ONCE(napi->list_owner, smp_processor_id());
- /* If not called from net_rx_action()
-@@ -4678,6 +4709,11 @@ static void napi_schedule_rps(struct sof
-
- #ifdef CONFIG_RPS
- if (sd != mysd) {
-+ if (use_backlog_threads()) {
-+ __napi_schedule_irqoff(&sd->backlog);
-+ return;
-+ }
-+
- sd->rps_ipi_next = mysd->rps_ipi_list;
- mysd->rps_ipi_list = sd;
-
-@@ -5937,7 +5973,7 @@ static void net_rps_action_and_irq_enabl
- #ifdef CONFIG_RPS
- struct softnet_data *remsd = sd->rps_ipi_list;
-
-- if (remsd) {
-+ if (!use_backlog_threads() && remsd) {
- sd->rps_ipi_list = NULL;
-
- local_irq_enable();
-@@ -5952,7 +5988,7 @@ static void net_rps_action_and_irq_enabl
- static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
- {
- #ifdef CONFIG_RPS
-- return sd->rps_ipi_list != NULL;
-+ return !use_backlog_threads() && sd->rps_ipi_list;
- #else
- return false;
- #endif
-@@ -5996,7 +6032,7 @@ static int process_backlog(struct napi_s
- * We can use a plain write instead of clear_bit(),
- * and we dont need an smp_mb() memory barrier.
- */
-- napi->state = 0;
-+ napi->state &= NAPIF_STATE_THREADED;
- again = false;
- } else {
- skb_queue_splice_tail_init(&sd->input_pkt_queue,
-@@ -6732,43 +6768,48 @@ static int napi_thread_wait(struct napi_
- return -1;
- }
-
--static int napi_threaded_poll(void *data)
-+static void napi_threaded_poll_loop(struct napi_struct *napi)
- {
-- struct napi_struct *napi = data;
- struct softnet_data *sd;
-- void *have;
-+ unsigned long last_qs = jiffies;
-
-- while (!napi_thread_wait(napi)) {
-- unsigned long last_qs = jiffies;
-+ for (;;) {
-+ bool repoll = false;
-+ void *have;
-
-- for (;;) {
-- bool repoll = false;
-+ local_bh_disable();
-+ sd = this_cpu_ptr(&softnet_data);
-+ sd->in_napi_threaded_poll = true;
-
-- local_bh_disable();
-- sd = this_cpu_ptr(&softnet_data);
-- sd->in_napi_threaded_poll = true;
--
-- have = netpoll_poll_lock(napi);
-- __napi_poll(napi, &repoll);
-- netpoll_poll_unlock(have);
--
-- sd->in_napi_threaded_poll = false;
-- barrier();
--
-- if (sd_has_rps_ipi_waiting(sd)) {
-- local_irq_disable();
-- net_rps_action_and_irq_enable(sd);
-- }
-- skb_defer_free_flush(sd);
-- local_bh_enable();
-+ have = netpoll_poll_lock(napi);
-+ __napi_poll(napi, &repoll);
-+ netpoll_poll_unlock(have);
-+
-+ sd->in_napi_threaded_poll = false;
-+ barrier();
-+
-+ if (sd_has_rps_ipi_waiting(sd)) {
-+ local_irq_disable();
-+ net_rps_action_and_irq_enable(sd);
-+ }
-+ skb_defer_free_flush(sd);
-+ local_bh_enable();
-
-- if (!repoll)
-- break;
-+ if (!repoll)
-+ break;
-
-- rcu_softirq_qs_periodic(last_qs);
-- cond_resched();
-- }
-+ rcu_softirq_qs_periodic(last_qs);
-+ cond_resched();
- }
-+}
-+
-+static int napi_threaded_poll(void *data)
-+{
-+ struct napi_struct *napi = data;
-+
-+ while (!napi_thread_wait(napi))
-+ napi_threaded_poll_loop(napi);
-+
- return 0;
- }
-
-@@ -11369,7 +11410,7 @@ static int dev_cpu_dead(unsigned int old
-
- list_del_init(&napi->poll_list);
- if (napi->poll == process_backlog)
-- napi->state = 0;
-+ napi->state &= NAPIF_STATE_THREADED;
- else
- ____napi_schedule(sd, napi);
- }
-@@ -11377,12 +11418,14 @@ static int dev_cpu_dead(unsigned int old
- raise_softirq_irqoff(NET_TX_SOFTIRQ);
- local_irq_enable();
-
-+ if (!use_backlog_threads()) {
- #ifdef CONFIG_RPS
-- remsd = oldsd->rps_ipi_list;
-- oldsd->rps_ipi_list = NULL;
-+ remsd = oldsd->rps_ipi_list;
-+ oldsd->rps_ipi_list = NULL;
- #endif
-- /* send out pending IPI's on offline CPU */
-- net_rps_send_ipi(remsd);
-+ /* send out pending IPI's on offline CPU */
-+ net_rps_send_ipi(remsd);
-+ }
-
- /* Process offline CPU's input_pkt_queue */
- while ((skb = __skb_dequeue(&oldsd->process_queue))) {
-@@ -11721,6 +11764,38 @@ static int net_page_pool_create(int cpui
- return 0;
- }
-
-+static int backlog_napi_should_run(unsigned int cpu)
-+{
-+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
-+ struct napi_struct *napi = &sd->backlog;
-+
-+ return test_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
-+}
-+
-+static void run_backlog_napi(unsigned int cpu)
-+{
-+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
-+
-+ napi_threaded_poll_loop(&sd->backlog);
-+}
-+
-+static void backlog_napi_setup(unsigned int cpu)
-+{
-+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu);
-+ struct napi_struct *napi = &sd->backlog;
-+
-+ napi->thread = this_cpu_read(backlog_napi);
-+ set_bit(NAPI_STATE_THREADED, &napi->state);
-+}
-+
-+static struct smp_hotplug_thread backlog_threads = {
-+ .store = &backlog_napi,
-+ .thread_should_run = backlog_napi_should_run,
-+ .thread_fn = run_backlog_napi,
-+ .thread_comm = "backlog_napi/%u",
-+ .setup = backlog_napi_setup,
-+};
-+
- /*
- * This is called single threaded during boot, so no need
- * to take the rtnl semaphore.
-@@ -11772,10 +11847,13 @@ static int __init net_dev_init(void)
- init_gro_hash(&sd->backlog);
- sd->backlog.poll = process_backlog;
- sd->backlog.weight = weight_p;
-+ INIT_LIST_HEAD(&sd->backlog.poll_list);
-
- if (net_page_pool_create(i))
- goto out;
- }
-+ if (use_backlog_threads())
-+ smpboot_register_percpu_thread(&backlog_threads);
-
- dev_boot_phase = 0;
-