diff options
Diffstat (limited to 'debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch')
-rw-r--r-- | debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch | 333 |
1 files changed, 0 insertions, 333 deletions
diff --git a/debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch b/debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch deleted file mode 100644 index 3e4b5b7bec..0000000000 --- a/debian/patches-rt/0002-net-Allow-to-use-SMP-threads-for-backlog-NAPI.patch +++ /dev/null @@ -1,333 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 25 Mar 2024 08:40:29 +0100 -Subject: [PATCH 2/4] net: Allow to use SMP threads for backlog NAPI. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.9/older/patches-6.9-rt5.tar.xz - -Backlog NAPI is a per-CPU NAPI struct only (with no device behind it) -used by drivers which don't do NAPI them self, RPS and parts of the -stack which need to avoid recursive deadlocks while processing a packet. - -The non-NAPI driver use the CPU local backlog NAPI. If RPS is enabled -then a flow for the skb is computed and based on the flow the skb can be -enqueued on a remote CPU. Scheduling/ raising the softirq (for backlog's -NAPI) on the remote CPU isn't trivial because the softirq is only -scheduled on the local CPU and performed after the hardirq is done. -In order to schedule a softirq on the remote CPU, an IPI is sent to the -remote CPU which schedules the backlog-NAPI on the then local CPU. - -On PREEMPT_RT interrupts are force-threaded. The soft interrupts are -raised within the interrupt thread and processed after the interrupt -handler completed still within the context of the interrupt thread. The -softirq is handled in the context where it originated. - -With force-threaded interrupts enabled, ksoftirqd is woken up if a -softirq is raised from hardirq context. This is the case if it is raised -from an IPI. Additionally there is a warning on PREEMPT_RT if the -softirq is raised from the idle thread. -This was done for two reasons: -- With threaded interrupts the processing should happen in thread - context (where it originated) and ksoftirqd is the only thread for - this context if raised from hardirq. Using the currently running task - instead would "punish" a random task. -- Once ksoftirqd is active it consumes all further softirqs until it - stops running. This changed recently and is no longer the case. - -Instead of keeping the backlog NAPI in ksoftirqd (in force-threaded/ -PREEMPT_RT setups) I am proposing NAPI-threads for backlog. -The "proper" setup with threaded-NAPI is not doable because the threads -are not pinned to an individual CPU and can be modified by the user. -Additionally a dummy network device would have to be assigned. Also -CPU-hotplug has to be considered if additional CPUs show up. -All this can be probably done/ solved but the smpboot-threads already -provide this infrastructure. - -Sending UDP packets over loopback expects that the packet is processed -within the call. Delaying it by handing it over to the thread hurts -performance. It is not beneficial to the outcome if the context switch -happens immediately after enqueue or after a while to process a few -packets in a batch. -There is no need to always use the thread if the backlog NAPI is -requested on the local CPU. This restores the loopback throuput. The -performance drops mostly to the same value after enabling RPS on the -loopback comparing the IPI and the tread result. - -Create NAPI-threads for backlog if request during boot. The thread runs -the inner loop from napi_threaded_poll(), the wait part is different. It -checks for NAPI_STATE_SCHED (the backlog NAPI can not be disabled). - -The NAPI threads for backlog are optional, it has to be enabled via the boot -argument "thread_backlog_napi". It is mandatory for PREEMPT_RT to avoid the -wakeup of ksoftirqd from the IPI. - -Acked-by: Jakub Kicinski <kuba@kernel.org> -Link: https://lore.kernel.org/r/20240325074943.289909-3-bigeasy@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - net/core/dev.c | 152 +++++++++++++++++++++++++++++++++++++++++++-------------- - 1 file changed, 115 insertions(+), 37 deletions(-) - ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -78,6 +78,7 @@ - #include <linux/slab.h> - #include <linux/sched.h> - #include <linux/sched/mm.h> -+#include <linux/smpboot.h> - #include <linux/mutex.h> - #include <linux/rwsem.h> - #include <linux/string.h> -@@ -197,6 +198,31 @@ static inline struct hlist_head *dev_ind - return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; - } - -+#ifndef CONFIG_PREEMPT_RT -+ -+static DEFINE_STATIC_KEY_FALSE(use_backlog_threads_key); -+ -+static int __init setup_backlog_napi_threads(char *arg) -+{ -+ static_branch_enable(&use_backlog_threads_key); -+ return 0; -+} -+early_param("thread_backlog_napi", setup_backlog_napi_threads); -+ -+static bool use_backlog_threads(void) -+{ -+ return static_branch_unlikely(&use_backlog_threads_key); -+} -+ -+#else -+ -+static bool use_backlog_threads(void) -+{ -+ return true; -+} -+ -+#endif -+ - static inline void rps_lock_irqsave(struct softnet_data *sd, - unsigned long *flags) - { -@@ -4410,6 +4436,7 @@ EXPORT_SYMBOL(__dev_direct_xmit); - /************************************************************************* - * Receiver routines - *************************************************************************/ -+static DEFINE_PER_CPU(struct task_struct *, backlog_napi); - - unsigned int sysctl_skb_defer_max __read_mostly = 64; - int weight_p __read_mostly = 64; /* old backlog weight */ -@@ -4433,12 +4460,16 @@ static inline void ____napi_schedule(str - */ - thread = READ_ONCE(napi->thread); - if (thread) { -+ if (use_backlog_threads() && thread == raw_cpu_read(backlog_napi)) -+ goto use_local_napi; -+ - set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); - wake_up_process(thread); - return; - } - } - -+use_local_napi: - list_add_tail(&napi->poll_list, &sd->poll_list); - WRITE_ONCE(napi->list_owner, smp_processor_id()); - /* If not called from net_rx_action() -@@ -4678,6 +4709,11 @@ static void napi_schedule_rps(struct sof - - #ifdef CONFIG_RPS - if (sd != mysd) { -+ if (use_backlog_threads()) { -+ __napi_schedule_irqoff(&sd->backlog); -+ return; -+ } -+ - sd->rps_ipi_next = mysd->rps_ipi_list; - mysd->rps_ipi_list = sd; - -@@ -5937,7 +5973,7 @@ static void net_rps_action_and_irq_enabl - #ifdef CONFIG_RPS - struct softnet_data *remsd = sd->rps_ipi_list; - -- if (remsd) { -+ if (!use_backlog_threads() && remsd) { - sd->rps_ipi_list = NULL; - - local_irq_enable(); -@@ -5952,7 +5988,7 @@ static void net_rps_action_and_irq_enabl - static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) - { - #ifdef CONFIG_RPS -- return sd->rps_ipi_list != NULL; -+ return !use_backlog_threads() && sd->rps_ipi_list; - #else - return false; - #endif -@@ -5996,7 +6032,7 @@ static int process_backlog(struct napi_s - * We can use a plain write instead of clear_bit(), - * and we dont need an smp_mb() memory barrier. - */ -- napi->state = 0; -+ napi->state &= NAPIF_STATE_THREADED; - again = false; - } else { - skb_queue_splice_tail_init(&sd->input_pkt_queue, -@@ -6732,43 +6768,48 @@ static int napi_thread_wait(struct napi_ - return -1; - } - --static int napi_threaded_poll(void *data) -+static void napi_threaded_poll_loop(struct napi_struct *napi) - { -- struct napi_struct *napi = data; - struct softnet_data *sd; -- void *have; -+ unsigned long last_qs = jiffies; - -- while (!napi_thread_wait(napi)) { -- unsigned long last_qs = jiffies; -+ for (;;) { -+ bool repoll = false; -+ void *have; - -- for (;;) { -- bool repoll = false; -+ local_bh_disable(); -+ sd = this_cpu_ptr(&softnet_data); -+ sd->in_napi_threaded_poll = true; - -- local_bh_disable(); -- sd = this_cpu_ptr(&softnet_data); -- sd->in_napi_threaded_poll = true; -- -- have = netpoll_poll_lock(napi); -- __napi_poll(napi, &repoll); -- netpoll_poll_unlock(have); -- -- sd->in_napi_threaded_poll = false; -- barrier(); -- -- if (sd_has_rps_ipi_waiting(sd)) { -- local_irq_disable(); -- net_rps_action_and_irq_enable(sd); -- } -- skb_defer_free_flush(sd); -- local_bh_enable(); -+ have = netpoll_poll_lock(napi); -+ __napi_poll(napi, &repoll); -+ netpoll_poll_unlock(have); -+ -+ sd->in_napi_threaded_poll = false; -+ barrier(); -+ -+ if (sd_has_rps_ipi_waiting(sd)) { -+ local_irq_disable(); -+ net_rps_action_and_irq_enable(sd); -+ } -+ skb_defer_free_flush(sd); -+ local_bh_enable(); - -- if (!repoll) -- break; -+ if (!repoll) -+ break; - -- rcu_softirq_qs_periodic(last_qs); -- cond_resched(); -- } -+ rcu_softirq_qs_periodic(last_qs); -+ cond_resched(); - } -+} -+ -+static int napi_threaded_poll(void *data) -+{ -+ struct napi_struct *napi = data; -+ -+ while (!napi_thread_wait(napi)) -+ napi_threaded_poll_loop(napi); -+ - return 0; - } - -@@ -11369,7 +11410,7 @@ static int dev_cpu_dead(unsigned int old - - list_del_init(&napi->poll_list); - if (napi->poll == process_backlog) -- napi->state = 0; -+ napi->state &= NAPIF_STATE_THREADED; - else - ____napi_schedule(sd, napi); - } -@@ -11377,12 +11418,14 @@ static int dev_cpu_dead(unsigned int old - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_enable(); - -+ if (!use_backlog_threads()) { - #ifdef CONFIG_RPS -- remsd = oldsd->rps_ipi_list; -- oldsd->rps_ipi_list = NULL; -+ remsd = oldsd->rps_ipi_list; -+ oldsd->rps_ipi_list = NULL; - #endif -- /* send out pending IPI's on offline CPU */ -- net_rps_send_ipi(remsd); -+ /* send out pending IPI's on offline CPU */ -+ net_rps_send_ipi(remsd); -+ } - - /* Process offline CPU's input_pkt_queue */ - while ((skb = __skb_dequeue(&oldsd->process_queue))) { -@@ -11721,6 +11764,38 @@ static int net_page_pool_create(int cpui - return 0; - } - -+static int backlog_napi_should_run(unsigned int cpu) -+{ -+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); -+ struct napi_struct *napi = &sd->backlog; -+ -+ return test_bit(NAPI_STATE_SCHED_THREADED, &napi->state); -+} -+ -+static void run_backlog_napi(unsigned int cpu) -+{ -+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); -+ -+ napi_threaded_poll_loop(&sd->backlog); -+} -+ -+static void backlog_napi_setup(unsigned int cpu) -+{ -+ struct softnet_data *sd = per_cpu_ptr(&softnet_data, cpu); -+ struct napi_struct *napi = &sd->backlog; -+ -+ napi->thread = this_cpu_read(backlog_napi); -+ set_bit(NAPI_STATE_THREADED, &napi->state); -+} -+ -+static struct smp_hotplug_thread backlog_threads = { -+ .store = &backlog_napi, -+ .thread_should_run = backlog_napi_should_run, -+ .thread_fn = run_backlog_napi, -+ .thread_comm = "backlog_napi/%u", -+ .setup = backlog_napi_setup, -+}; -+ - /* - * This is called single threaded during boot, so no need - * to take the rtnl semaphore. -@@ -11772,10 +11847,13 @@ static int __init net_dev_init(void) - init_gro_hash(&sd->backlog); - sd->backlog.poll = process_backlog; - sd->backlog.weight = weight_p; -+ INIT_LIST_HEAD(&sd->backlog.poll_list); - - if (net_page_pool_create(i)) - goto out; - } -+ if (use_backlog_threads()) -+ smpboot_register_percpu_thread(&backlog_threads); - - dev_boot_phase = 0; - |