diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/xsk')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c | 230 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h | 27 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 311 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 191 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h | 21 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c | 123 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h | 15 |
8 files changed, 941 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c new file mode 100644 index 000000000..ebada0c5a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */ + +#include <net/xdp_sock_drv.h> +#include "pool.h" +#include "setup.h" +#include "en/params.h" + +static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool) +{ + struct device *dev = mlx5_core_dma_dev(priv->mdev); + + return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); +} + +static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); +} + +static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk) +{ + if (!xsk->pools) { + xsk->pools = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->pools), GFP_KERNEL); + if (unlikely(!xsk->pools)) + return -ENOMEM; + } + + xsk->refcnt++; + xsk->ever_used = true; + + return 0; +} + +static void mlx5e_xsk_put_pools(struct mlx5e_xsk *xsk) +{ + if (!--xsk->refcnt) { + kfree(xsk->pools); + xsk->pools = NULL; + } +} + +static int mlx5e_xsk_add_pool(struct mlx5e_xsk *xsk, struct xsk_buff_pool *pool, u16 ix) +{ + int err; + + err = mlx5e_xsk_get_pools(xsk); + if (unlikely(err)) + return err; + + xsk->pools[ix] = pool; + return 0; +} + +static void mlx5e_xsk_remove_pool(struct mlx5e_xsk *xsk, u16 ix) +{ + xsk->pools[ix] = NULL; + + mlx5e_xsk_put_pools(xsk); +} + +static bool mlx5e_xsk_is_pool_sane(struct xsk_buff_pool *pool) +{ + return xsk_pool_get_headroom(pool) <= 0xffff && + xsk_pool_get_chunk_size(pool) <= 0xffff; +} + +void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk) +{ + xsk->headroom = xsk_pool_get_headroom(pool); + xsk->chunk_size = xsk_pool_get_chunk_size(pool); + xsk->unaligned = pool->unaligned; +} + +static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, + struct xsk_buff_pool *pool, u16 ix) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + int err; + + if (unlikely(mlx5e_xsk_get_pool(&priv->channels.params, &priv->xsk, ix))) + return -EBUSY; + + if (unlikely(!mlx5e_xsk_is_pool_sane(pool))) + return -EINVAL; + + err = mlx5e_xsk_map_pool(priv, pool); + if (unlikely(err)) + return err; + + err = mlx5e_xsk_add_pool(&priv->xsk, pool, ix); + if (unlikely(err)) + goto err_unmap_pool; + + mlx5e_build_xsk_param(pool, &xsk); + + if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + mlx5e_mpwrq_umr_mode(priv->mdev, &xsk) == MLX5E_MPWRQ_UMR_MODE_OVERSIZED) { + const char *recommendation = is_power_of_2(xsk.chunk_size) ? + "Upgrade firmware" : "Disable striding RQ"; + + mlx5_core_warn(priv->mdev, "Expected slowdown with XSK frame size %u. %s for better performance.\n", + xsk.chunk_size, recommendation); + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + /* XSK objects will be created on open. */ + goto validate_closed; + } + + if (!params->xdp_prog) { + /* XSK objects will be created when an XDP program is set, + * and the channels are reopened. + */ + goto validate_closed; + } + + c = priv->channels.c[ix]; + + err = mlx5e_open_xsk(priv, params, &xsk, pool, c); + if (unlikely(err)) + goto err_remove_pool; + + mlx5e_activate_xsk(c); + mlx5e_trigger_napi_icosq(c); + + /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide + * any Fill Ring entries at the setup stage. + */ + + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, true); + + mlx5e_deactivate_rq(&c->rq); + mlx5e_flush_rq(&c->rq, MLX5_RQC_STATE_RDY); + + return 0; + +err_remove_pool: + mlx5e_xsk_remove_pool(&priv->xsk, ix); + +err_unmap_pool: + mlx5e_xsk_unmap_pool(priv, pool); + + return err; + +validate_closed: + /* Check the configuration in advance, rather than fail at a later stage + * (in mlx5e_xdp_set or on open) and end up with no channels. + */ + if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + err = -EINVAL; + goto err_remove_pool; + } + + return 0; +} + +static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) +{ + struct xsk_buff_pool *pool = mlx5e_xsk_get_pool(&priv->channels.params, + &priv->xsk, ix); + struct mlx5e_channel *c; + + if (unlikely(!pool)) + return -EINVAL; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto remove_pool; + + /* XSK RQ and SQ are only created if XDP program is set. */ + if (!priv->channels.params.xdp_prog) + goto remove_pool; + + c = priv->channels.c[ix]; + + mlx5e_activate_rq(&c->rq); + mlx5e_trigger_napi_icosq(c); + mlx5e_wait_for_min_rx_wqes(&c->rq, MLX5E_RQ_WQES_TIMEOUT); + + mlx5e_rx_res_xsk_update(priv->rx_res, &priv->channels, ix, false); + + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +remove_pool: + mlx5e_xsk_remove_pool(&priv->xsk, ix); + mlx5e_xsk_unmap_pool(priv, pool); + + return 0; +} + +static int mlx5e_xsk_enable_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool, + u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_enable_locked(priv, pool, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_xsk_disable_pool(struct mlx5e_priv *priv, u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_disable_locked(priv, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + + if (unlikely(qid >= params->num_channels)) + return -EINVAL; + + return pool ? mlx5e_xsk_enable_pool(priv, pool, qid) : + mlx5e_xsk_disable_pool(priv, qid); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h new file mode 100644 index 000000000..dca0010a0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019-2020, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_XSK_POOL_H__ +#define __MLX5_EN_XSK_POOL_H__ + +#include "en.h" + +static inline struct xsk_buff_pool *mlx5e_xsk_get_pool(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->pools) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->pools[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xsk_buff_pool *pool, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_pool(struct net_device *dev, struct xsk_buff_pool *pool, u16 qid); + +#endif /* __MLX5_EN_XSK_POOL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c new file mode 100644 index 000000000..c91b54d9f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rx.h" +#include "en/xdp.h" +#include <net/xdp_sock_drv.h> +#include <linux/filter.h> + +/* RX data path */ + +int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); + struct mlx5e_icosq *icosq = rq->icosq; + struct mlx5_wq_cyc *wq = &icosq->wq; + struct mlx5e_umr_wqe *umr_wqe; + int batch, i; + u32 offset; /* 17-bit value with MTT. */ + u16 pi; + + if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) + goto err; + + BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk)); + batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units, + rq->mpwqe.pages_per_wqe); + + /* If batch < pages_per_wqe, either: + * 1. Some (or all) descriptors were invalid. + * 2. dma_need_sync is true, and it fell back to allocating one frame. + * In either case, try to continue allocating frames one by one, until + * the first error, which will mean there are no more valid descriptors. + */ + for (; batch < rq->mpwqe.pages_per_wqe; batch++) { + wi->alloc_units[batch].xsk = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!wi->alloc_units[batch].xsk)) + goto err_reuse_batch; + } + + pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs); + umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); + + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { + .ptag = cpu_to_be64(addr | MLX5_EN_WR), + }; + } + } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + } + } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { + u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); + + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + }; + umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size), + }; + umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr + mapping_size * 2), + }; + umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + }; + } + } else { + __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - + rq->xsk_pool->chunk_size); + __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); + + for (i = 0; i < batch; i++) { + dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); + + umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(addr), + .bcount = frame_size, + }; + umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) { + .key = rq->mkey_be, + .va = cpu_to_be64(rq->wqe_overflow.addr), + .bcount = pad_size, + }; + } + } + + bitmap_zero(wi->xdp_xmit_bitmap, rq->mpwqe.pages_per_wqe); + wi->consumed_strides = 0; + + umr_wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); + + /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ + offset = ix * rq->mpwqe.mtts_per_wqe; + if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) + offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; + else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) + offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; + umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + + icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { + .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, + .num_wqebbs = rq->mpwqe.umr_wqebbs, + .umr.rq = rq, + }; + + icosq->pc += rq->mpwqe.umr_wqebbs; + + icosq->doorbell_cseg = &umr_wqe->ctrl; + + return 0; + +err_reuse_batch: + while (--batch >= 0) + xsk_buff_free(wi->alloc_units[batch].xsk); + +err: + rq->stats->buff_alloc_err++; + return -ENOMEM; +} + +int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + struct xdp_buff **buffs; + u32 contig, alloc; + int i; + + /* mlx5e_init_frags_partition creates a 1:1 mapping between + * rq->wqe.frags and rq->wqe.alloc_units, which allows us to + * allocate XDP buffers straight into alloc_units. + */ + BUILD_BUG_ON(sizeof(rq->wqe.alloc_units[0]) != + sizeof(rq->wqe.alloc_units[0].xsk)); + buffs = (struct xdp_buff **)rq->wqe.alloc_units; + contig = mlx5_wq_cyc_get_size(wq) - ix; + if (wqe_bulk <= contig) { + alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); + } else { + alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig); + if (likely(alloc == contig)) + alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig); + } + + for (i = 0; i < alloc; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + struct mlx5e_rx_wqe_cyc *wqe; + dma_addr_t addr; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + /* Assumes log_num_frags == 0. */ + frag = &rq->wqe.frags[j]; + + addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + } + + return alloc; +} + +int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) +{ + struct mlx5_wq_cyc *wq = &rq->wqe.wq; + int i; + + for (i = 0; i < wqe_bulk; i++) { + int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + struct mlx5e_rx_wqe_cyc *wqe; + dma_addr_t addr; + + wqe = mlx5_wq_cyc_get_wqe(wq, j); + /* Assumes log_num_frags == 0. */ + frag = &rq->wqe.frags[j]; + + frag->au->xsk = xsk_buff_alloc(rq->xsk_pool); + if (unlikely(!frag->au->xsk)) + return i; + + addr = xsk_buff_xdp_get_frame_dma(frag->au->xsk); + wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); + } + + return wqe_bulk; +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp) +{ + u32 totallen = xdp->data_end - xdp->data_meta; + u32 metalen = xdp->data - xdp->data_meta; + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, totallen); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_put_data(skb, xdp->data_meta, totallen); + + if (metalen) { + skb_metadata_set(skb, metalen); + __skb_pull(skb, metalen); + } + + return skb; +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx) +{ + struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk; + struct bpf_prog *prog; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + /* head_offset is not used in this function, because xdp->data and the + * DMA address point directly to the necessary place. Furthermore, in + * the current implementation, UMR pages are mapped to XSK frames, so + * head_offset should always be 0. + */ + WARN_ON_ONCE(head_offset); + + xsk_buff_set_size(xdp, cqe_bcnt); + xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); + net_prefetch(xdp->data); + + /* Possible flows: + * - XDP_REDIRECT to XSKMAP: + * The page is owned by the userspace from now. + * - XDP_TX and other XDP_REDIRECTs: + * The page was returned by ZCA and recycled. + * - XDP_DROP: + * Recycle the page. + * - XDP_PASS: + * Allocate an SKB, copy the data and recycle the page. + * + * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its + * size is the same as the Driver RX Ring's size, and pages for WQEs are + * allocated first from the Reuse Ring, so it has enough space. + */ + + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the + * frame. On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, xdp); +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + struct xdp_buff *xdp = wi->au->xsk; + struct bpf_prog *prog; + + /* wi->offset is not used in this function, because xdp->data and the + * DMA address point directly to the necessary place. Furthermore, the + * XSK allocator allocates frames per packet, instead of pages, so + * wi->offset should always be 0. + */ + WARN_ON_ONCE(wi->offset); + + xsk_buff_set_size(xdp, cqe_bcnt); + xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); + net_prefetch(xdp->data); + + prog = rcu_dereference(rq->xdp_prog); + if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) + return NULL; /* page/packet was consumed by XDP */ + + /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse + * will be handled by mlx5e_free_rx_wqe. + * On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, xdp); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h new file mode 100644 index 000000000..087c943bd --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_RX_H__ +#define __MLX5_EN_XSK_RX_H__ + +#include "en.h" + +/* RX data path */ + +int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); +int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); +int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx); +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); + +#endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c new file mode 100644 index 000000000..ff03c4383 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "setup.h" +#include "en/params.h" +#include "en/txrx.h" +#include "en/health.h" +#include <net/xdp_sock_drv.h> + +/* The limitation of 2048 can be altered, but shouldn't go beyond the minimal + * stride size of striding RQ. + */ +#define MLX5E_MIN_XSK_CHUNK_SIZE max(2048, XDP_UMEM_MIN_CHUNK_SIZE) + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev) +{ + /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ + if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) + return false; + + /* frag_sz is different for regular and XSK RQs, so ensure that linear + * SKB mode is possible. + */ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return mlx5e_rx_is_linear_skb(mdev, params, xsk); + } +} + +static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + u16 q_counter, + struct mlx5e_channel_param *cparam) +{ + mlx5e_build_rq_param(mdev, params, xsk, q_counter, &cparam->rq); + mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq); +} + +static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq *rq) +{ + struct mlx5_core_dev *mdev = c->mdev; + int rq_xdp_ix; + int err; + + rq->wq_type = params->rq_wq_type; + rq->pdev = c->pdev; + rq->netdev = c->netdev; + rq->priv = c->priv; + rq->tstamp = c->tstamp; + rq->clock = &mdev->clock; + rq->icosq = &c->icosq; + rq->ix = c->ix; + rq->channel = c; + rq->mdev = mdev; + rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->xdpsq = &c->rq_xdpsq; + rq->xsk_pool = pool; + rq->stats = &c->priv->channel_stats[c->ix]->xskrq; + rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); + rq_xdp_ix = c->ix; + err = mlx5e_rq_set_handlers(rq, params, xsk); + if (err) + return err; + + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); +} + +static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *rq_params, struct xsk_buff_pool *pool, + struct mlx5e_xsk_param *xsk) +{ + int err; + + err = mlx5e_init_xsk_rq(c, params, pool, xsk, &c->xskrq); + if (err) + return err; + + return mlx5e_open_rq(params, rq_params, xsk, cpu_to_node(c->cpu), &c->xskrq); +} + +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, + struct mlx5e_channel *c) +{ + struct mlx5e_channel_param *cparam; + struct mlx5e_create_cq_param ccp; + int err; + + mlx5e_build_create_cq_param(&ccp, c); + + if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + return -EINVAL; + + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!cparam) + return -ENOMEM; + + mlx5e_build_xsk_cparam(priv->mdev, params, xsk, priv->q_counter, cparam); + + err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp, + &c->xskrq.cq); + if (unlikely(err)) + goto err_free_cparam; + + err = mlx5e_open_xsk_rq(c, params, &cparam->rq, pool, xsk); + if (unlikely(err)) + goto err_close_rx_cq; + + err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp, + &c->xsksq.cq); + if (unlikely(err)) + goto err_close_rq; + + /* Create a separate SQ, so that when the buff pool is disabled, we could + * close this SQ safely and stop receiving CQEs. In other case, e.g., if + * the XDPSQ was used instead, we might run into trouble when the buff pool + * is disabled and then re-enabled, but the SQ continues receiving CQEs + * from the old buff pool. + */ + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, pool, &c->xsksq, true); + if (unlikely(err)) + goto err_close_tx_cq; + + kvfree(cparam); + + set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + return 0; + +err_close_tx_cq: + mlx5e_close_cq(&c->xsksq.cq); + +err_close_rq: + mlx5e_close_rq(&c->xskrq); + +err_close_rx_cq: + mlx5e_close_cq(&c->xskrq.cq); + +err_free_cparam: + kvfree(cparam); + + return err; +} + +void mlx5e_close_xsk(struct mlx5e_channel *c) +{ + clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + synchronize_net(); /* Sync with NAPI. */ + + mlx5e_close_rq(&c->xskrq); + mlx5e_close_cq(&c->xskrq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); + + memset(&c->xskrq, 0, sizeof(c->xskrq)); + memset(&c->xsksq, 0, sizeof(c->xsksq)); +} + +void mlx5e_activate_xsk(struct mlx5e_channel *c) +{ + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + + /* TX queue is created active. */ +} + +void mlx5e_deactivate_xsk(struct mlx5e_channel *c) +{ + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + + /* TX queue is disabled on close. */ +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h new file mode 100644 index 000000000..50e111b85 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_SETUP_H__ +#define __MLX5_EN_XSK_SETUP_H__ + +#include "en.h" + +struct mlx5e_xsk_param; + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev); +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xsk_buff_pool *pool, + struct mlx5e_channel *c); +void mlx5e_close_xsk(struct mlx5e_channel *c); +void mlx5e_activate_xsk(struct mlx5e_channel *c); +void mlx5e_deactivate_xsk(struct mlx5e_channel *c); + +#endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c new file mode 100644 index 000000000..367a9505c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "tx.h" +#include "pool.h" +#include "en/xdp.h" +#include "en/params.h" +#include <net/xdp_sock_drv.h> + +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel *c; + + if (unlikely(!mlx5e_xdp_is_active(priv))) + return -ENETDOWN; + + if (unlikely(qid >= params->num_channels)) + return -EINVAL; + + c = priv->channels.c[qid]; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + /* To avoid WQE overrun, don't post a NOP if async_icosq is not + * active and not polled by NAPI. Return 0, because the upcoming + * activate will trigger the IRQ for us. + */ + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &c->async_icosq.state))) + return 0; + + if (test_and_set_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state)) + return 0; + + mlx5e_trigger_napi_icosq(c); + } + + return 0; +} + +/* When TX fails (because of the size of the packet), we need to get completions + * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish + * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the + * same. + */ +static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + struct mlx5e_tx_wqe *nopwqe; + + wi->num_wqebbs = 1; + wi->num_pkts = 1; + + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + sq->doorbell_cseg = &nopwqe->ctrl; +} + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) +{ + struct xsk_buff_pool *pool = sq->xsk_pool; + struct mlx5e_xmit_data xdptxd; + struct mlx5e_xdp_info xdpi; + bool work_done = true; + bool flush = false; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK; + + for (; budget; budget--) { + int check_result = INDIRECT_CALL_2(sq->xmit_xdp_frame_check, + mlx5e_xmit_xdp_frame_check_mpwqe, + mlx5e_xmit_xdp_frame_check, + sq); + struct xdp_desc desc; + bool ret; + + if (unlikely(check_result < 0)) { + work_done = false; + break; + } + + if (!xsk_tx_peek_desc(pool, &desc)) { + /* TX will get stuck until something wakes it up by + * triggering NAPI. Currently it's expected that the + * application calls sendto() if there are consumed, but + * not completed frames. + */ + break; + } + + xdptxd.dma_addr = xsk_buff_raw_get_dma(pool, desc.addr); + xdptxd.data = xsk_buff_raw_get_data(pool, desc.addr); + xdptxd.len = desc.len; + + xsk_buff_raw_dma_sync_for_device(pool, xdptxd.dma_addr, xdptxd.len); + + ret = INDIRECT_CALL_2(sq->xmit_xdp_frame, mlx5e_xmit_xdp_frame_mpwqe, + mlx5e_xmit_xdp_frame, sq, &xdptxd, NULL, + check_result); + if (unlikely(!ret)) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xsk_tx_post_err(sq, &xdpi); + } else { + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, &xdpi); + } + + flush = true; + } + + if (flush) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + + xsk_tx_release(pool); + } + + return !(budget && work_done); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h new file mode 100644 index 000000000..9c505158b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_TX_H__ +#define __MLX5_EN_XSK_TX_H__ + +#include "en.h" + +/* TX data path */ + +int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); + +#endif /* __MLX5_EN_XSK_TX_H__ */ |