From: Clark Williams Date: Fri, 17 Dec 2021 14:31:31 -0600 Subject: [PATCH 198/351] net: move xmit_recursion to per-task variable on -RT Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=0cd54619f2ddfd939c73b97d5318d50ffc142ee0 A softirq on -RT can be preempted. That means one task is in __dev_queue_xmit(), gets preempted and another task may enter __dev_queue_xmit() aw well. netperf together with a bridge device will then trigger the `recursion alert` because each task increments the xmit_recursion variable which is per-CPU. A virtual device like br0 is required to trigger this warning. This patch moves the lock owner and counter to be per task instead per-CPU so it counts the recursion properly on -RT. The owner is also a task now and not a CPU number. Cc: stable-rt@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Clark Williams --- include/linux/netdevice.h | 71 +++++++++++++++++++++++++++++++++++++-- include/linux/sched.h | 3 ++ net/core/dev.c | 6 +++- 3 files changed, 77 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d893dc112afc..dfec34087039 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -588,7 +588,11 @@ struct netdev_queue { * write-mostly part */ spinlock_t _xmit_lock ____cacheline_aligned_in_smp; +#ifdef CONFIG_PREEMPT_RT_FULL + struct task_struct *xmit_lock_owner; +#else int xmit_lock_owner; +#endif /* * Time (in jiffies) of last Tx */ @@ -3009,14 +3013,38 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd, #endif } +#define XMIT_RECURSION_LIMIT 8 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +#ifdef CONFIG_PREEMPT_RT_FULL +static inline int dev_recursion_level(void) +{ + return current->xmit_recursion; +} + +static inline bool dev_xmit_recursion(void) +{ + return unlikely(current->xmit_recursion > + XMIT_RECURSION_LIMIT); +} + +static inline void dev_xmit_recursion_inc(void) +{ + current->xmit_recursion++; +} + +static inline void dev_xmit_recursion_dec(void) +{ + current->xmit_recursion--; +} + +#else + static inline int dev_recursion_level(void) { return this_cpu_read(softnet_data.xmit.recursion); } -#define XMIT_RECURSION_LIMIT 8 static inline bool dev_xmit_recursion(void) { return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > @@ -3032,6 +3060,7 @@ static inline void dev_xmit_recursion_dec(void) { __this_cpu_dec(softnet_data.xmit.recursion); } +#endif void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); @@ -3841,6 +3870,44 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) return (1U << debug_value) - 1; } +#ifdef CONFIG_PREEMPT_RT_FULL +static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu) +{ + txq->xmit_lock_owner = current; +} + +static inline void netdev_queue_clear_owner(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = NULL; +} + +static inline bool netdev_queue_has_owner(struct netdev_queue *txq) +{ + if (txq->xmit_lock_owner != NULL) + return true; + return false; +} + +#else + +static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu) +{ + txq->xmit_lock_owner = cpu; +} + +static inline void netdev_queue_clear_owner(struct netdev_queue *txq) +{ + txq->xmit_lock_owner = -1; +} + +static inline bool netdev_queue_has_owner(struct netdev_queue *txq) +{ + if (txq->xmit_lock_owner != -1) + return true; + return false; +} +#endif + static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); @@ -3893,7 +3960,7 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) static inline void txq_trans_update(struct netdev_queue *txq) { - if (txq->xmit_lock_owner != -1) + if (netdev_queue_has_owner(txq)) txq->trans_start = jiffies; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 3bc6b3206e14..7aa299de5ebf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1222,6 +1222,9 @@ struct task_struct { #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; +#endif +#ifdef CONFIG_PREEMPT_RT_FULL + int xmit_recursion; #endif int pagefault_disabled; #ifdef CONFIG_MMU diff --git a/net/core/dev.c b/net/core/dev.c index 8ea1c7347987..78fd4ac06a1b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3833,10 +3833,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ +#ifdef CONFIG_PREEMPT_RT_FULL + if (READ_ONCE(txq->xmit_lock_owner) != current) { +#else /* Other cpus might concurrently change txq->xmit_lock_owner * to -1 or to their cpu id, but not to our id. */ if (READ_ONCE(txq->xmit_lock_owner) != cpu) { +#endif if (dev_xmit_recursion()) goto recursion_alert; @@ -8594,7 +8598,7 @@ static void netdev_init_one_queue(struct net_device *dev, /* Initialize queue lock */ spin_lock_init(&queue->_xmit_lock); netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); - queue->xmit_lock_owner = -1; + netdev_queue_clear_owner(queue); netdev_queue_numa_node_write(queue, NUMA_NO_NODE); queue->dev = dev; #ifdef CONFIG_BQL