diff options
Diffstat (limited to 'drivers/net/ethernet/fungible')
22 files changed, 8874 insertions, 0 deletions
diff --git a/drivers/net/ethernet/fungible/Kconfig b/drivers/net/ethernet/fungible/Kconfig new file mode 100644 index 000000000..1ecedecc0 --- /dev/null +++ b/drivers/net/ethernet/fungible/Kconfig @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Fungible network driver configuration +# + +config NET_VENDOR_FUNGIBLE + bool "Fungible devices" + default y + help + If you have a Fungible network device, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Fungible cards. If you say Y, you will be asked + for your specific card in the following questions. + +if NET_VENDOR_FUNGIBLE + +config FUN_CORE + tristate + select SBITMAP + help + A service module offering basic common services to Fungible + device drivers. + +source "drivers/net/ethernet/fungible/funeth/Kconfig" + +endif # NET_VENDOR_FUNGIBLE diff --git a/drivers/net/ethernet/fungible/Makefile b/drivers/net/ethernet/fungible/Makefile new file mode 100644 index 000000000..df759f158 --- /dev/null +++ b/drivers/net/ethernet/fungible/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +# +# Makefile for the Fungible network device drivers. +# + +obj-$(CONFIG_FUN_CORE) += funcore/ +obj-$(CONFIG_FUN_ETH) += funeth/ diff --git a/drivers/net/ethernet/fungible/funcore/Makefile b/drivers/net/ethernet/fungible/funcore/Makefile new file mode 100644 index 000000000..bc16b264b --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +obj-$(CONFIG_FUN_CORE) += funcore.o + +funcore-y := fun_dev.o fun_queue.o diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.c b/drivers/net/ethernet/fungible/funcore/fun_dev.c new file mode 100644 index 000000000..fb5120d90 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_dev.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/aer.h> +#include <linux/bitmap.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/nvme.h> +#include <linux/pci.h> +#include <linux/wait.h> +#include <linux/sched/signal.h> + +#include "fun_queue.h" +#include "fun_dev.h" + +#define FUN_ADMIN_CMD_TO_MS 3000 + +enum { + AQA_ASQS_SHIFT = 0, + AQA_ACQS_SHIFT = 16, + AQA_MIN_QUEUE_SIZE = 2, + AQA_MAX_QUEUE_SIZE = 4096 +}; + +/* context for admin commands */ +struct fun_cmd_ctx { + fun_admin_callback_t cb; /* callback to invoke on completion */ + void *cb_data; /* user data provided to callback */ + int cpu; /* CPU where the cmd's tag was allocated */ +}; + +/* Context for synchronous admin commands. */ +struct fun_sync_cmd_ctx { + struct completion compl; + u8 *rsp_buf; /* caller provided response buffer */ + unsigned int rsp_len; /* response buffer size */ + u8 rsp_status; /* command response status */ +}; + +/* Wait for the CSTS.RDY bit to match @enabled. */ +static int fun_wait_ready(struct fun_dev *fdev, bool enabled) +{ + unsigned int cap_to = NVME_CAP_TIMEOUT(fdev->cap_reg); + u32 bit = enabled ? NVME_CSTS_RDY : 0; + unsigned long deadline; + + deadline = ((cap_to + 1) * HZ / 2) + jiffies; /* CAP.TO is in 500ms */ + + for (;;) { + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + + if (csts == ~0) { + dev_err(fdev->dev, "CSTS register read %#x\n", csts); + return -EIO; + } + + if ((csts & NVME_CSTS_RDY) == bit) + return 0; + + if (time_is_before_jiffies(deadline)) + break; + + msleep(100); + } + + dev_err(fdev->dev, + "Timed out waiting for device to indicate RDY %u; aborting %s\n", + enabled, enabled ? "initialization" : "reset"); + return -ETIMEDOUT; +} + +/* Check CSTS and return an error if it is unreadable or has unexpected + * RDY value. + */ +static int fun_check_csts_rdy(struct fun_dev *fdev, unsigned int expected_rdy) +{ + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + u32 actual_rdy = csts & NVME_CSTS_RDY; + + if (csts == ~0) { + dev_err(fdev->dev, "CSTS register read %#x\n", csts); + return -EIO; + } + if (actual_rdy != expected_rdy) { + dev_err(fdev->dev, "Unexpected CSTS RDY %u\n", actual_rdy); + return -EINVAL; + } + return 0; +} + +/* Check that CSTS RDY has the expected value. Then write a new value to the CC + * register and wait for CSTS RDY to match the new CC ENABLE state. + */ +static int fun_update_cc_enable(struct fun_dev *fdev, unsigned int initial_rdy) +{ + int rc = fun_check_csts_rdy(fdev, initial_rdy); + + if (rc) + return rc; + writel(fdev->cc_reg, fdev->bar + NVME_REG_CC); + return fun_wait_ready(fdev, !!(fdev->cc_reg & NVME_CC_ENABLE)); +} + +static int fun_disable_ctrl(struct fun_dev *fdev) +{ + fdev->cc_reg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE); + return fun_update_cc_enable(fdev, 1); +} + +static int fun_enable_ctrl(struct fun_dev *fdev, u32 admin_cqesz_log2, + u32 admin_sqesz_log2) +{ + fdev->cc_reg = (admin_cqesz_log2 << NVME_CC_IOCQES_SHIFT) | + (admin_sqesz_log2 << NVME_CC_IOSQES_SHIFT) | + ((PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT) | + NVME_CC_ENABLE; + + return fun_update_cc_enable(fdev, 0); +} + +static int fun_map_bars(struct fun_dev *fdev, const char *name) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + int err; + + err = pci_request_mem_regions(pdev, name); + if (err) { + dev_err(&pdev->dev, + "Couldn't get PCI memory resources, err %d\n", err); + return err; + } + + fdev->bar = pci_ioremap_bar(pdev, 0); + if (!fdev->bar) { + dev_err(&pdev->dev, "Couldn't map BAR 0\n"); + pci_release_mem_regions(pdev); + return -ENOMEM; + } + + return 0; +} + +static void fun_unmap_bars(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + + if (fdev->bar) { + iounmap(fdev->bar); + fdev->bar = NULL; + pci_release_mem_regions(pdev); + } +} + +static int fun_set_dma_masks(struct device *dev) +{ + int err; + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (err) + dev_err(dev, "DMA mask configuration failed, err %d\n", err); + return err; +} + +static irqreturn_t fun_admin_irq(int irq, void *data) +{ + struct fun_queue *funq = data; + + return fun_process_cq(funq, 0) ? IRQ_HANDLED : IRQ_NONE; +} + +static void fun_complete_admin_cmd(struct fun_queue *funq, void *data, + void *entry, const struct fun_cqe_info *info) +{ + const struct fun_admin_rsp_common *rsp_common = entry; + struct fun_dev *fdev = funq->fdev; + struct fun_cmd_ctx *cmd_ctx; + int cpu; + u16 cid; + + if (info->sqhd == cpu_to_be16(0xffff)) { + dev_dbg(fdev->dev, "adminq event"); + if (fdev->adminq_cb) + fdev->adminq_cb(fdev, entry); + return; + } + + cid = be16_to_cpu(rsp_common->cid); + dev_dbg(fdev->dev, "admin CQE cid %u, op %u, ret %u\n", cid, + rsp_common->op, rsp_common->ret); + + cmd_ctx = &fdev->cmd_ctx[cid]; + if (cmd_ctx->cpu < 0) { + dev_err(fdev->dev, + "admin CQE with CID=%u, op=%u does not match a pending command\n", + cid, rsp_common->op); + return; + } + + if (cmd_ctx->cb) + cmd_ctx->cb(fdev, entry, xchg(&cmd_ctx->cb_data, NULL)); + + cpu = cmd_ctx->cpu; + cmd_ctx->cpu = -1; + sbitmap_queue_clear(&fdev->admin_sbq, cid, cpu); +} + +static int fun_init_cmd_ctx(struct fun_dev *fdev, unsigned int ntags) +{ + unsigned int i; + + fdev->cmd_ctx = kvcalloc(ntags, sizeof(*fdev->cmd_ctx), GFP_KERNEL); + if (!fdev->cmd_ctx) + return -ENOMEM; + + for (i = 0; i < ntags; i++) + fdev->cmd_ctx[i].cpu = -1; + + return 0; +} + +/* Allocate and enable an admin queue and assign it the first IRQ vector. */ +static int fun_enable_admin_queue(struct fun_dev *fdev, + const struct fun_dev_params *areq) +{ + struct fun_queue_alloc_req qreq = { + .cqe_size_log2 = areq->cqe_size_log2, + .sqe_size_log2 = areq->sqe_size_log2, + .cq_depth = areq->cq_depth, + .sq_depth = areq->sq_depth, + .rq_depth = areq->rq_depth, + }; + unsigned int ntags = areq->sq_depth - 1; + struct fun_queue *funq; + int rc; + + if (fdev->admin_q) + return -EEXIST; + + if (areq->sq_depth < AQA_MIN_QUEUE_SIZE || + areq->sq_depth > AQA_MAX_QUEUE_SIZE || + areq->cq_depth < AQA_MIN_QUEUE_SIZE || + areq->cq_depth > AQA_MAX_QUEUE_SIZE) + return -EINVAL; + + fdev->admin_q = fun_alloc_queue(fdev, 0, &qreq); + if (!fdev->admin_q) + return -ENOMEM; + + rc = fun_init_cmd_ctx(fdev, ntags); + if (rc) + goto free_q; + + rc = sbitmap_queue_init_node(&fdev->admin_sbq, ntags, -1, false, + GFP_KERNEL, dev_to_node(fdev->dev)); + if (rc) + goto free_cmd_ctx; + + funq = fdev->admin_q; + funq->cq_vector = 0; + rc = fun_request_irq(funq, dev_name(fdev->dev), fun_admin_irq, funq); + if (rc) + goto free_sbq; + + fun_set_cq_callback(funq, fun_complete_admin_cmd, NULL); + fdev->adminq_cb = areq->event_cb; + + writel((funq->sq_depth - 1) << AQA_ASQS_SHIFT | + (funq->cq_depth - 1) << AQA_ACQS_SHIFT, + fdev->bar + NVME_REG_AQA); + + writeq(funq->sq_dma_addr, fdev->bar + NVME_REG_ASQ); + writeq(funq->cq_dma_addr, fdev->bar + NVME_REG_ACQ); + + rc = fun_enable_ctrl(fdev, areq->cqe_size_log2, areq->sqe_size_log2); + if (rc) + goto free_irq; + + if (areq->rq_depth) { + rc = fun_create_rq(funq); + if (rc) + goto disable_ctrl; + + funq_rq_post(funq); + } + + return 0; + +disable_ctrl: + fun_disable_ctrl(fdev); +free_irq: + fun_free_irq(funq); +free_sbq: + sbitmap_queue_free(&fdev->admin_sbq); +free_cmd_ctx: + kvfree(fdev->cmd_ctx); + fdev->cmd_ctx = NULL; +free_q: + fun_free_queue(fdev->admin_q); + fdev->admin_q = NULL; + return rc; +} + +static void fun_disable_admin_queue(struct fun_dev *fdev) +{ + struct fun_queue *admq = fdev->admin_q; + + if (!admq) + return; + + fun_disable_ctrl(fdev); + + fun_free_irq(admq); + __fun_process_cq(admq, 0); + + sbitmap_queue_free(&fdev->admin_sbq); + + kvfree(fdev->cmd_ctx); + fdev->cmd_ctx = NULL; + + fun_free_queue(admq); + fdev->admin_q = NULL; +} + +/* Return %true if the admin queue has stopped servicing commands as can be + * detected through registers. This isn't exhaustive and may provide false + * negatives. + */ +static bool fun_adminq_stopped(struct fun_dev *fdev) +{ + u32 csts = readl(fdev->bar + NVME_REG_CSTS); + + return (csts & (NVME_CSTS_CFS | NVME_CSTS_RDY)) != NVME_CSTS_RDY; +} + +static int fun_wait_for_tag(struct fun_dev *fdev, int *cpup) +{ + struct sbitmap_queue *sbq = &fdev->admin_sbq; + struct sbq_wait_state *ws = &sbq->ws[0]; + DEFINE_SBQ_WAIT(wait); + int tag; + + for (;;) { + sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_UNINTERRUPTIBLE); + if (fdev->suppress_cmds) { + tag = -ESHUTDOWN; + break; + } + tag = sbitmap_queue_get(sbq, cpup); + if (tag >= 0) + break; + schedule(); + } + + sbitmap_finish_wait(sbq, ws, &wait); + return tag; +} + +/* Submit an asynchronous admin command. Caller is responsible for implementing + * any waiting or timeout. Upon command completion the callback @cb is called. + */ +int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd, + fun_admin_callback_t cb, void *cb_data, bool wait_ok) +{ + struct fun_queue *funq = fdev->admin_q; + unsigned int cmdsize = cmd->len8 * 8; + struct fun_cmd_ctx *cmd_ctx; + int tag, cpu, rc = 0; + + if (WARN_ON(cmdsize > (1 << funq->sqe_size_log2))) + return -EMSGSIZE; + + tag = sbitmap_queue_get(&fdev->admin_sbq, &cpu); + if (tag < 0) { + if (!wait_ok) + return -EAGAIN; + tag = fun_wait_for_tag(fdev, &cpu); + if (tag < 0) + return tag; + } + + cmd->cid = cpu_to_be16(tag); + + cmd_ctx = &fdev->cmd_ctx[tag]; + cmd_ctx->cb = cb; + cmd_ctx->cb_data = cb_data; + + spin_lock(&funq->sq_lock); + + if (unlikely(fdev->suppress_cmds)) { + rc = -ESHUTDOWN; + sbitmap_queue_clear(&fdev->admin_sbq, tag, cpu); + } else { + cmd_ctx->cpu = cpu; + memcpy(fun_sqe_at(funq, funq->sq_tail), cmd, cmdsize); + + dev_dbg(fdev->dev, "admin cmd @ %u: %8ph\n", funq->sq_tail, + cmd); + + if (++funq->sq_tail == funq->sq_depth) + funq->sq_tail = 0; + writel(funq->sq_tail, funq->sq_db); + } + spin_unlock(&funq->sq_lock); + return rc; +} + +/* Abandon a pending admin command by clearing the issuer's callback data. + * Failure indicates that the command either has already completed or its + * completion is racing with this call. + */ +static bool fun_abandon_admin_cmd(struct fun_dev *fd, + const struct fun_admin_req_common *cmd, + void *cb_data) +{ + u16 cid = be16_to_cpu(cmd->cid); + struct fun_cmd_ctx *cmd_ctx = &fd->cmd_ctx[cid]; + + return cmpxchg(&cmd_ctx->cb_data, cb_data, NULL) == cb_data; +} + +/* Stop submission of new admin commands and wake up any processes waiting for + * tags. Already submitted commands are left to complete or time out. + */ +static void fun_admin_stop(struct fun_dev *fdev) +{ + spin_lock(&fdev->admin_q->sq_lock); + fdev->suppress_cmds = true; + spin_unlock(&fdev->admin_q->sq_lock); + sbitmap_queue_wake_all(&fdev->admin_sbq); +} + +/* The callback for synchronous execution of admin commands. It copies the + * command response to the caller's buffer and signals completion. + */ +static void fun_admin_cmd_sync_cb(struct fun_dev *fd, void *rsp, void *cb_data) +{ + const struct fun_admin_rsp_common *rsp_common = rsp; + struct fun_sync_cmd_ctx *ctx = cb_data; + + if (!ctx) + return; /* command issuer timed out and left */ + if (ctx->rsp_buf) { + unsigned int rsp_len = rsp_common->len8 * 8; + + if (unlikely(rsp_len > ctx->rsp_len)) { + dev_err(fd->dev, + "response for op %u is %uB > response buffer %uB\n", + rsp_common->op, rsp_len, ctx->rsp_len); + rsp_len = ctx->rsp_len; + } + memcpy(ctx->rsp_buf, rsp, rsp_len); + } + ctx->rsp_status = rsp_common->ret; + complete(&ctx->compl); +} + +/* Submit a synchronous admin command. */ +int fun_submit_admin_sync_cmd(struct fun_dev *fdev, + struct fun_admin_req_common *cmd, void *rsp, + size_t rspsize, unsigned int timeout) +{ + struct fun_sync_cmd_ctx ctx = { + .compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl), + .rsp_buf = rsp, + .rsp_len = rspsize, + }; + unsigned int cmdlen = cmd->len8 * 8; + unsigned long jiffies_left; + int ret; + + ret = fun_submit_admin_cmd(fdev, cmd, fun_admin_cmd_sync_cb, &ctx, + true); + if (ret) + return ret; + + if (!timeout) + timeout = FUN_ADMIN_CMD_TO_MS; + + jiffies_left = wait_for_completion_timeout(&ctx.compl, + msecs_to_jiffies(timeout)); + if (!jiffies_left) { + /* The command timed out. Attempt to cancel it so we can return. + * But if the command is in the process of completing we'll + * wait for it. + */ + if (fun_abandon_admin_cmd(fdev, cmd, &ctx)) { + dev_err(fdev->dev, "admin command timed out: %*ph\n", + cmdlen, cmd); + fun_admin_stop(fdev); + /* see if the timeout was due to a queue failure */ + if (fun_adminq_stopped(fdev)) + dev_err(fdev->dev, + "device does not accept admin commands\n"); + + return -ETIMEDOUT; + } + wait_for_completion(&ctx.compl); + } + + if (ctx.rsp_status) { + dev_err(fdev->dev, "admin command failed, err %d: %*ph\n", + ctx.rsp_status, cmdlen, cmd); + } + + return -ctx.rsp_status; +} +EXPORT_SYMBOL_GPL(fun_submit_admin_sync_cmd); + +/* Return the number of device resources of the requested type. */ +int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res) +{ + union { + struct fun_admin_res_count_req req; + struct fun_admin_res_count_rsp rsp; + } cmd; + int rc; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(cmd.req)); + cmd.req.count = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_RES_COUNT, + 0, 0); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd), 0); + return rc ? rc : be32_to_cpu(cmd.rsp.count.data); +} +EXPORT_SYMBOL_GPL(fun_get_res_count); + +/* Request that the instance of resource @res with the given id be deleted. */ +int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res, + unsigned int flags, u32 id) +{ + struct fun_admin_generic_destroy_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(req)), + .destroy = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_DESTROY, + flags, id) + }; + + return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0); +} +EXPORT_SYMBOL_GPL(fun_res_destroy); + +/* Bind two entities of the given types and IDs. */ +int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0, + unsigned int id0, enum fun_admin_bind_type type1, + unsigned int id1) +{ + struct { + struct fun_admin_bind_req req; + struct fun_admin_bind_entry entry[2]; + } cmd = { + .req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND, + sizeof(cmd)), + .entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0), + .entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1), + }; + + return fun_submit_admin_sync_cmd(fdev, &cmd.req.common, NULL, 0, 0); +} +EXPORT_SYMBOL_GPL(fun_bind); + +static int fun_get_dev_limits(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + unsigned int cq_count, sq_count, num_dbs; + int rc; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPCQ); + if (rc < 0) + return rc; + cq_count = rc; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPSQ); + if (rc < 0) + return rc; + sq_count = rc; + + /* The admin queue consumes 1 CQ and at least 1 SQ. To be usable the + * device must provide additional queues. + */ + if (cq_count < 2 || sq_count < 2 + !!fdev->admin_q->rq_depth) + return -EINVAL; + + /* Calculate the max QID based on SQ/CQ/doorbell counts. + * SQ/CQ doorbells alternate. + */ + num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) >> + (2 + NVME_CAP_STRIDE(fdev->cap_reg)); + fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1; + fdev->kern_end_qid = fdev->max_qid + 1; + return 0; +} + +/* Allocate all MSI-X vectors available on a function and at least @min_vecs. */ +static int fun_alloc_irqs(struct pci_dev *pdev, unsigned int min_vecs) +{ + int vecs, num_msix = pci_msix_vec_count(pdev); + + if (num_msix < 0) + return num_msix; + if (min_vecs > num_msix) + return -ERANGE; + + vecs = pci_alloc_irq_vectors(pdev, min_vecs, num_msix, PCI_IRQ_MSIX); + if (vecs > 0) { + dev_info(&pdev->dev, + "Allocated %d IRQ vectors of %d requested\n", + vecs, num_msix); + } else { + dev_err(&pdev->dev, + "Unable to allocate at least %u IRQ vectors\n", + min_vecs); + } + return vecs; +} + +/* Allocate and initialize the IRQ manager state. */ +static int fun_alloc_irq_mgr(struct fun_dev *fdev) +{ + fdev->irq_map = bitmap_zalloc(fdev->num_irqs, GFP_KERNEL); + if (!fdev->irq_map) + return -ENOMEM; + + spin_lock_init(&fdev->irqmgr_lock); + /* mark IRQ 0 allocated, it is used by the admin queue */ + __set_bit(0, fdev->irq_map); + fdev->irqs_avail = fdev->num_irqs - 1; + return 0; +} + +/* Reserve @nirqs of the currently available IRQs and return their indices. */ +int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, u16 *irq_indices) +{ + unsigned int b, n = 0; + int err = -ENOSPC; + + if (!nirqs) + return 0; + + spin_lock(&fdev->irqmgr_lock); + if (nirqs > fdev->irqs_avail) + goto unlock; + + for_each_clear_bit(b, fdev->irq_map, fdev->num_irqs) { + __set_bit(b, fdev->irq_map); + irq_indices[n++] = b; + if (n >= nirqs) + break; + } + + WARN_ON(n < nirqs); + fdev->irqs_avail -= n; + err = n; +unlock: + spin_unlock(&fdev->irqmgr_lock); + return err; +} +EXPORT_SYMBOL(fun_reserve_irqs); + +/* Release @nirqs previously allocated IRQS with the supplied indices. */ +void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices) +{ + unsigned int i; + + spin_lock(&fdev->irqmgr_lock); + for (i = 0; i < nirqs; i++) + __clear_bit(irq_indices[i], fdev->irq_map); + fdev->irqs_avail += nirqs; + spin_unlock(&fdev->irqmgr_lock); +} +EXPORT_SYMBOL(fun_release_irqs); + +static void fun_serv_handler(struct work_struct *work) +{ + struct fun_dev *fd = container_of(work, struct fun_dev, service_task); + + if (test_bit(FUN_SERV_DISABLED, &fd->service_flags)) + return; + if (fd->serv_cb) + fd->serv_cb(fd); +} + +void fun_serv_stop(struct fun_dev *fd) +{ + set_bit(FUN_SERV_DISABLED, &fd->service_flags); + cancel_work_sync(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_stop); + +void fun_serv_restart(struct fun_dev *fd) +{ + clear_bit(FUN_SERV_DISABLED, &fd->service_flags); + if (fd->service_flags) + schedule_work(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_restart); + +void fun_serv_sched(struct fun_dev *fd) +{ + if (!test_bit(FUN_SERV_DISABLED, &fd->service_flags)) + schedule_work(&fd->service_task); +} +EXPORT_SYMBOL_GPL(fun_serv_sched); + +/* Check and try to get the device into a proper state for initialization, + * i.e., CSTS.RDY = CC.EN = 0. + */ +static int sanitize_dev(struct fun_dev *fdev) +{ + int rc; + + fdev->cap_reg = readq(fdev->bar + NVME_REG_CAP); + fdev->cc_reg = readl(fdev->bar + NVME_REG_CC); + + /* First get RDY to agree with the current EN. Give RDY the opportunity + * to complete a potential recent EN change. + */ + rc = fun_wait_ready(fdev, fdev->cc_reg & NVME_CC_ENABLE); + if (rc) + return rc; + + /* Next, reset the device if EN is currently 1. */ + if (fdev->cc_reg & NVME_CC_ENABLE) + rc = fun_disable_ctrl(fdev); + + return rc; +} + +/* Undo the device initialization of fun_dev_enable(). */ +void fun_dev_disable(struct fun_dev *fdev) +{ + struct pci_dev *pdev = to_pci_dev(fdev->dev); + + pci_set_drvdata(pdev, NULL); + + if (fdev->fw_handle != FUN_HCI_ID_INVALID) { + fun_res_destroy(fdev, FUN_ADMIN_OP_SWUPGRADE, 0, + fdev->fw_handle); + fdev->fw_handle = FUN_HCI_ID_INVALID; + } + + fun_disable_admin_queue(fdev); + + bitmap_free(fdev->irq_map); + pci_free_irq_vectors(pdev); + + pci_clear_master(pdev); + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); + + fun_unmap_bars(fdev); +} +EXPORT_SYMBOL(fun_dev_disable); + +/* Perform basic initialization of a device, including + * - PCI config space setup and BAR0 mapping + * - interrupt management initialization + * - 1 admin queue setup + * - determination of some device limits, such as number of queues. + */ +int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev, + const struct fun_dev_params *areq, const char *name) +{ + int rc; + + fdev->dev = &pdev->dev; + rc = fun_map_bars(fdev, name); + if (rc) + return rc; + + rc = fun_set_dma_masks(fdev->dev); + if (rc) + goto unmap; + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(&pdev->dev, "Couldn't enable device, err %d\n", rc); + goto unmap; + } + + pci_enable_pcie_error_reporting(pdev); + + rc = sanitize_dev(fdev); + if (rc) + goto disable_dev; + + fdev->fw_handle = FUN_HCI_ID_INVALID; + fdev->q_depth = NVME_CAP_MQES(fdev->cap_reg) + 1; + fdev->db_stride = 1 << NVME_CAP_STRIDE(fdev->cap_reg); + fdev->dbs = fdev->bar + NVME_REG_DBS; + + INIT_WORK(&fdev->service_task, fun_serv_handler); + fdev->service_flags = FUN_SERV_DISABLED; + fdev->serv_cb = areq->serv_cb; + + rc = fun_alloc_irqs(pdev, areq->min_msix + 1); /* +1 for admin CQ */ + if (rc < 0) + goto disable_dev; + fdev->num_irqs = rc; + + rc = fun_alloc_irq_mgr(fdev); + if (rc) + goto free_irqs; + + pci_set_master(pdev); + rc = fun_enable_admin_queue(fdev, areq); + if (rc) + goto free_irq_mgr; + + rc = fun_get_dev_limits(fdev); + if (rc < 0) + goto disable_admin; + + pci_save_state(pdev); + pci_set_drvdata(pdev, fdev); + pcie_print_link_status(pdev); + dev_dbg(fdev->dev, "q_depth %u, db_stride %u, max qid %d kern_end_qid %d\n", + fdev->q_depth, fdev->db_stride, fdev->max_qid, + fdev->kern_end_qid); + return 0; + +disable_admin: + fun_disable_admin_queue(fdev); +free_irq_mgr: + pci_clear_master(pdev); + bitmap_free(fdev->irq_map); +free_irqs: + pci_free_irq_vectors(pdev); +disable_dev: + pci_disable_pcie_error_reporting(pdev); + pci_disable_device(pdev); +unmap: + fun_unmap_bars(fdev); + return rc; +} +EXPORT_SYMBOL(fun_dev_enable); + +MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>"); +MODULE_DESCRIPTION("Core services driver for Fungible devices"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.h b/drivers/net/ethernet/fungible/funcore/fun_dev.h new file mode 100644 index 000000000..9e8c17ce8 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_dev.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNDEV_H +#define _FUNDEV_H + +#include <linux/sbitmap.h> +#include <linux/spinlock_types.h> +#include <linux/workqueue.h> +#include "fun_hci.h" + +struct pci_dev; +struct fun_dev; +struct fun_queue; +struct fun_cmd_ctx; +struct fun_queue_alloc_req; + +/* doorbell fields */ +enum { + FUN_DB_QIDX_S = 0, + FUN_DB_INTCOAL_ENTRIES_S = 16, + FUN_DB_INTCOAL_ENTRIES_M = 0x7f, + FUN_DB_INTCOAL_USEC_S = 23, + FUN_DB_INTCOAL_USEC_M = 0x7f, + FUN_DB_IRQ_S = 30, + FUN_DB_IRQ_F = 1 << FUN_DB_IRQ_S, + FUN_DB_IRQ_ARM_S = 31, + FUN_DB_IRQ_ARM_F = 1U << FUN_DB_IRQ_ARM_S +}; + +/* Callback for asynchronous admin commands. + * Invoked on reception of command response. + */ +typedef void (*fun_admin_callback_t)(struct fun_dev *fdev, void *rsp, + void *cb_data); + +/* Callback for events/notifications received by an admin queue. */ +typedef void (*fun_admin_event_cb)(struct fun_dev *fdev, void *cqe); + +/* Callback for pending work handled by the service task. */ +typedef void (*fun_serv_cb)(struct fun_dev *fd); + +/* service task flags */ +enum { + FUN_SERV_DISABLED, /* service task is disabled */ + FUN_SERV_FIRST_AVAIL +}; + +/* Driver state associated with a PCI function. */ +struct fun_dev { + struct device *dev; + + void __iomem *bar; /* start of BAR0 mapping */ + u32 __iomem *dbs; /* start of doorbells in BAR0 mapping */ + + /* admin queue */ + struct fun_queue *admin_q; + struct sbitmap_queue admin_sbq; + struct fun_cmd_ctx *cmd_ctx; + fun_admin_event_cb adminq_cb; + bool suppress_cmds; /* if set don't write commands to SQ */ + + /* address increment between consecutive doorbells, in 4B units */ + unsigned int db_stride; + + /* SW versions of device registers */ + u32 cc_reg; /* CC register */ + u64 cap_reg; /* CAPability register */ + + unsigned int q_depth; /* max queue depth supported by device */ + unsigned int max_qid; /* = #queues - 1, separately for SQs and CQs */ + unsigned int kern_end_qid; /* last qid in the kernel range + 1 */ + + unsigned int fw_handle; + + /* IRQ manager */ + unsigned int num_irqs; + unsigned int irqs_avail; + spinlock_t irqmgr_lock; + unsigned long *irq_map; + + /* The service task handles work that needs a process context */ + struct work_struct service_task; + unsigned long service_flags; + fun_serv_cb serv_cb; +}; + +struct fun_dev_params { + u8 cqe_size_log2; /* admin q CQE size */ + u8 sqe_size_log2; /* admin q SQE size */ + + /* admin q depths */ + u16 cq_depth; + u16 sq_depth; + u16 rq_depth; + + u16 min_msix; /* min vectors needed by requesting driver */ + + fun_admin_event_cb event_cb; + fun_serv_cb serv_cb; +}; + +/* Return the BAR address of a doorbell. */ +static inline u32 __iomem *fun_db_addr(const struct fun_dev *fdev, + unsigned int db_index) +{ + return &fdev->dbs[db_index * fdev->db_stride]; +} + +/* Return the BAR address of an SQ doorbell. SQ and CQ DBs alternate, + * SQs have even DB indices. + */ +static inline u32 __iomem *fun_sq_db_addr(const struct fun_dev *fdev, + unsigned int sqid) +{ + return fun_db_addr(fdev, sqid * 2); +} + +static inline u32 __iomem *fun_cq_db_addr(const struct fun_dev *fdev, + unsigned int cqid) +{ + return fun_db_addr(fdev, cqid * 2 + 1); +} + +int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res); +int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res, + unsigned int flags, u32 id); +int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0, + unsigned int id0, enum fun_admin_bind_type type1, + unsigned int id1); + +int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd, + fun_admin_callback_t cb, void *cb_data, bool wait_ok); +int fun_submit_admin_sync_cmd(struct fun_dev *fdev, + struct fun_admin_req_common *cmd, void *rsp, + size_t rspsize, unsigned int timeout); + +int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev, + const struct fun_dev_params *areq, const char *name); +void fun_dev_disable(struct fun_dev *fdev); + +int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices); +void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs, + u16 *irq_indices); + +void fun_serv_stop(struct fun_dev *fd); +void fun_serv_restart(struct fun_dev *fd); +void fun_serv_sched(struct fun_dev *fd); + +#endif /* _FUNDEV_H */ diff --git a/drivers/net/ethernet/fungible/funcore/fun_hci.h b/drivers/net/ethernet/fungible/funcore/fun_hci.h new file mode 100644 index 000000000..f21819670 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_hci.h @@ -0,0 +1,1242 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef __FUN_HCI_H +#define __FUN_HCI_H + +enum { + FUN_HCI_ID_INVALID = 0xffffffff, +}; + +enum fun_admin_op { + FUN_ADMIN_OP_BIND = 0x1, + FUN_ADMIN_OP_EPCQ = 0x11, + FUN_ADMIN_OP_EPSQ = 0x12, + FUN_ADMIN_OP_PORT = 0x13, + FUN_ADMIN_OP_ETH = 0x14, + FUN_ADMIN_OP_VI = 0x15, + FUN_ADMIN_OP_SWUPGRADE = 0x1f, + FUN_ADMIN_OP_RSS = 0x21, + FUN_ADMIN_OP_ADI = 0x25, + FUN_ADMIN_OP_KTLS = 0x26, +}; + +enum { + FUN_REQ_COMMON_FLAG_RSP = 0x1, + FUN_REQ_COMMON_FLAG_HEAD_WB = 0x2, + FUN_REQ_COMMON_FLAG_INT = 0x4, + FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF = 0x8, +}; + +struct fun_admin_req_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 rsvd0; + __be16 cid; +}; + +#define FUN_ADMIN_REQ_COMMON_INIT(_op, _len8, _flags, _suboff8, _cid) \ + (struct fun_admin_req_common) { \ + .op = (_op), .len8 = (_len8), .flags = cpu_to_be16(_flags), \ + .suboff8 = (_suboff8), .cid = cpu_to_be16(_cid), \ + } + +#define FUN_ADMIN_REQ_COMMON_INIT2(_op, _len) \ + (struct fun_admin_req_common) { \ + .op = (_op), .len8 = (_len) / 8, \ + } + +struct fun_admin_rsp_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 ret; + __be16 cid; +}; + +struct fun_admin_write48_req { + __be64 key_to_data; +}; + +#define FUN_ADMIN_WRITE48_REQ_KEY_S 56U +#define FUN_ADMIN_WRITE48_REQ_KEY_M 0xff +#define FUN_ADMIN_WRITE48_REQ_KEY_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_WRITE48_REQ_KEY_S) + +#define FUN_ADMIN_WRITE48_REQ_DATA_S 0U +#define FUN_ADMIN_WRITE48_REQ_DATA_M 0xffffffffffff +#define FUN_ADMIN_WRITE48_REQ_DATA_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_WRITE48_REQ_DATA_S) + +#define FUN_ADMIN_WRITE48_REQ_INIT(key, data) \ + (struct fun_admin_write48_req) { \ + .key_to_data = cpu_to_be64( \ + FUN_ADMIN_WRITE48_REQ_KEY_P_NOSWAP(key) | \ + FUN_ADMIN_WRITE48_REQ_DATA_P_NOSWAP(data)), \ + } + +struct fun_admin_write48_rsp { + __be64 key_to_data; +}; + +struct fun_admin_read48_req { + __be64 key_pack; +}; + +#define FUN_ADMIN_READ48_REQ_KEY_S 56U +#define FUN_ADMIN_READ48_REQ_KEY_M 0xff +#define FUN_ADMIN_READ48_REQ_KEY_P_NOSWAP(x) \ + (((__u64)x) << FUN_ADMIN_READ48_REQ_KEY_S) + +#define FUN_ADMIN_READ48_REQ_INIT(key) \ + (struct fun_admin_read48_req) { \ + .key_pack = \ + cpu_to_be64(FUN_ADMIN_READ48_REQ_KEY_P_NOSWAP(key)), \ + } + +struct fun_admin_read48_rsp { + __be64 key_to_data; +}; + +#define FUN_ADMIN_READ48_RSP_KEY_S 56U +#define FUN_ADMIN_READ48_RSP_KEY_M 0xff +#define FUN_ADMIN_READ48_RSP_KEY_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_KEY_S) & \ + FUN_ADMIN_READ48_RSP_KEY_M) + +#define FUN_ADMIN_READ48_RSP_RET_S 48U +#define FUN_ADMIN_READ48_RSP_RET_M 0xff +#define FUN_ADMIN_READ48_RSP_RET_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_RET_S) & \ + FUN_ADMIN_READ48_RSP_RET_M) + +#define FUN_ADMIN_READ48_RSP_DATA_S 0U +#define FUN_ADMIN_READ48_RSP_DATA_M 0xffffffffffff +#define FUN_ADMIN_READ48_RSP_DATA_G(x) \ + ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_DATA_S) & \ + FUN_ADMIN_READ48_RSP_DATA_M) + +enum fun_admin_bind_type { + FUN_ADMIN_BIND_TYPE_EPCQ = 0x1, + FUN_ADMIN_BIND_TYPE_EPSQ = 0x2, + FUN_ADMIN_BIND_TYPE_PORT = 0x3, + FUN_ADMIN_BIND_TYPE_RSS = 0x4, + FUN_ADMIN_BIND_TYPE_VI = 0x5, + FUN_ADMIN_BIND_TYPE_ETH = 0x6, +}; + +struct fun_admin_bind_entry { + __u8 type; + __u8 rsvd0[3]; + __be32 id; +}; + +#define FUN_ADMIN_BIND_ENTRY_INIT(_type, _id) \ + (struct fun_admin_bind_entry) { \ + .type = (_type), .id = cpu_to_be32(_id), \ + } + +struct fun_admin_bind_req { + struct fun_admin_req_common common; + struct fun_admin_bind_entry entry[]; +}; + +struct fun_admin_bind_rsp { + struct fun_admin_rsp_common bind_rsp_common; +}; + +struct fun_admin_simple_subop { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 data; +}; + +#define FUN_ADMIN_SIMPLE_SUBOP_INIT(_subop, _flags, _data) \ + (struct fun_admin_simple_subop) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .data = cpu_to_be32(_data), \ + } + +enum fun_admin_subop { + FUN_ADMIN_SUBOP_CREATE = 0x10, + FUN_ADMIN_SUBOP_DESTROY = 0x11, + FUN_ADMIN_SUBOP_MODIFY = 0x12, + FUN_ADMIN_SUBOP_RES_COUNT = 0x14, + FUN_ADMIN_SUBOP_READ = 0x15, + FUN_ADMIN_SUBOP_WRITE = 0x16, + FUN_ADMIN_SUBOP_NOTIFY = 0x17, +}; + +enum { + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR = 0x1, +}; + +struct fun_admin_generic_destroy_req { + struct fun_admin_req_common common; + struct fun_admin_simple_subop destroy; +}; + +struct fun_admin_generic_create_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; +}; + +struct fun_admin_res_count_req { + struct fun_admin_req_common common; + struct fun_admin_simple_subop count; +}; + +struct fun_admin_res_count_rsp { + struct fun_admin_rsp_common common; + struct fun_admin_simple_subop count; +}; + +enum { + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_EPCQ = 0x2, + FUN_ADMIN_EPCQ_CREATE_FLAG_ENTRY_WR_TPH = 0x4, + FUN_ADMIN_EPCQ_CREATE_FLAG_SL_WR_TPH = 0x8, + FUN_ADMIN_EPCQ_CREATE_FLAG_RQ = 0x80, + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_IQ = 0x100, + FUN_ADMIN_EPCQ_CREATE_FLAG_INT_NOARM = 0x200, + FUN_ADMIN_EPCQ_CREATE_FLAG_DROP_ON_OVERFLOW = 0x400, +}; + +struct fun_admin_epcq_req { + struct fun_admin_req_common common; + union epcq_req_subop { + struct fun_admin_epcq_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 epsqid; + __u8 rsvd1; + __u8 entry_size_log2; + __be16 nentries; + + __be64 address; + + __be16 tailroom; /* per packet tailroom in bytes */ + __u8 headroom; /* per packet headroom in 2B units */ + __u8 intcoal_kbytes; + __u8 intcoal_holdoff_nentries; + __u8 intcoal_holdoff_usecs; + __be16 intid; + + __be32 scan_start_id; + __be32 scan_end_id; + + __be16 tph_cpuid; + __u8 rsvd3[6]; + } create; + + struct fun_admin_epcq_modify_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be16 headroom; /* headroom in bytes */ + __u8 rsvd1[6]; + } modify; + } u; +}; + +#define FUN_ADMIN_EPCQ_CREATE_REQ_INIT( \ + _subop, _flags, _id, _epsqid, _entry_size_log2, _nentries, _address, \ + _tailroom, _headroom, _intcoal_kbytes, _intcoal_holdoff_nentries, \ + _intcoal_holdoff_usecs, _intid, _scan_start_id, _scan_end_id, \ + _tph_cpuid) \ + (struct fun_admin_epcq_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .epsqid = cpu_to_be32(_epsqid), \ + .entry_size_log2 = _entry_size_log2, \ + .nentries = cpu_to_be16(_nentries), \ + .address = cpu_to_be64(_address), \ + .tailroom = cpu_to_be16(_tailroom), .headroom = _headroom, \ + .intcoal_kbytes = _intcoal_kbytes, \ + .intcoal_holdoff_nentries = _intcoal_holdoff_nentries, \ + .intcoal_holdoff_usecs = _intcoal_holdoff_usecs, \ + .intid = cpu_to_be16(_intid), \ + .scan_start_id = cpu_to_be32(_scan_start_id), \ + .scan_end_id = cpu_to_be32(_scan_end_id), \ + .tph_cpuid = cpu_to_be16(_tph_cpuid), \ + } + +#define FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(_subop, _flags, _id, _headroom) \ + (struct fun_admin_epcq_modify_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .headroom = cpu_to_be16(_headroom), \ + } + +enum { + FUN_ADMIN_EPSQ_CREATE_FLAG_INT_EPSQ = 0x2, + FUN_ADMIN_EPSQ_CREATE_FLAG_ENTRY_RD_TPH = 0x4, + FUN_ADMIN_EPSQ_CREATE_FLAG_GL_RD_TPH = 0x8, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS = 0x10, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS_TPH = 0x20, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_EPCQ = 0x40, + FUN_ADMIN_EPSQ_CREATE_FLAG_RQ = 0x80, + FUN_ADMIN_EPSQ_CREATE_FLAG_INT_IQ = 0x100, + FUN_ADMIN_EPSQ_CREATE_FLAG_NO_CMPL = 0x200, +}; + +struct fun_admin_epsq_req { + struct fun_admin_req_common common; + + union epsq_req_subop { + struct fun_admin_epsq_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 epcqid; + __u8 rsvd1; + __u8 entry_size_log2; + __be16 nentries; + + __be64 address; /* DMA address of epsq */ + + __u8 rsvd2[3]; + __u8 intcoal_kbytes; + __u8 intcoal_holdoff_nentries; + __u8 intcoal_holdoff_usecs; + __be16 intid; + + __be32 scan_start_id; + __be32 scan_end_id; + + __u8 rsvd3[4]; + __be16 tph_cpuid; + __u8 buf_size_log2; /* log2 of RQ buffer size */ + __u8 head_wb_size_log2; /* log2 of head write back size */ + + __be64 head_wb_address; /* DMA address for head writeback */ + } create; + } u; +}; + +#define FUN_ADMIN_EPSQ_CREATE_REQ_INIT( \ + _subop, _flags, _id, _epcqid, _entry_size_log2, _nentries, _address, \ + _intcoal_kbytes, _intcoal_holdoff_nentries, _intcoal_holdoff_usecs, \ + _intid, _scan_start_id, _scan_end_id, _tph_cpuid, _buf_size_log2, \ + _head_wb_size_log2, _head_wb_address) \ + (struct fun_admin_epsq_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .epcqid = cpu_to_be32(_epcqid), \ + .entry_size_log2 = _entry_size_log2, \ + .nentries = cpu_to_be16(_nentries), \ + .address = cpu_to_be64(_address), \ + .intcoal_kbytes = _intcoal_kbytes, \ + .intcoal_holdoff_nentries = _intcoal_holdoff_nentries, \ + .intcoal_holdoff_usecs = _intcoal_holdoff_usecs, \ + .intid = cpu_to_be16(_intid), \ + .scan_start_id = cpu_to_be32(_scan_start_id), \ + .scan_end_id = cpu_to_be32(_scan_end_id), \ + .tph_cpuid = cpu_to_be16(_tph_cpuid), \ + .buf_size_log2 = _buf_size_log2, \ + .head_wb_size_log2 = _head_wb_size_log2, \ + .head_wb_address = cpu_to_be64(_head_wb_address), \ + } + +enum { + FUN_PORT_CAP_OFFLOADS = 0x1, + FUN_PORT_CAP_STATS = 0x2, + FUN_PORT_CAP_LOOPBACK = 0x4, + FUN_PORT_CAP_VPORT = 0x8, + FUN_PORT_CAP_TX_PAUSE = 0x10, + FUN_PORT_CAP_RX_PAUSE = 0x20, + FUN_PORT_CAP_AUTONEG = 0x40, + FUN_PORT_CAP_RSS = 0x80, + FUN_PORT_CAP_VLAN_OFFLOADS = 0x100, + FUN_PORT_CAP_ENCAP_OFFLOADS = 0x200, + FUN_PORT_CAP_1000_X = 0x1000, + FUN_PORT_CAP_10G_R = 0x2000, + FUN_PORT_CAP_40G_R4 = 0x4000, + FUN_PORT_CAP_25G_R = 0x8000, + FUN_PORT_CAP_50G_R2 = 0x10000, + FUN_PORT_CAP_50G_R = 0x20000, + FUN_PORT_CAP_100G_R4 = 0x40000, + FUN_PORT_CAP_100G_R2 = 0x80000, + FUN_PORT_CAP_200G_R4 = 0x100000, + FUN_PORT_CAP_FEC_NONE = 0x10000000, + FUN_PORT_CAP_FEC_FC = 0x20000000, + FUN_PORT_CAP_FEC_RS = 0x40000000, +}; + +enum fun_port_brkout_mode { + FUN_PORT_BRKMODE_NA = 0x0, + FUN_PORT_BRKMODE_NONE = 0x1, + FUN_PORT_BRKMODE_2X = 0x2, + FUN_PORT_BRKMODE_4X = 0x3, +}; + +enum { + FUN_PORT_SPEED_AUTO = 0x0, + FUN_PORT_SPEED_10M = 0x1, + FUN_PORT_SPEED_100M = 0x2, + FUN_PORT_SPEED_1G = 0x4, + FUN_PORT_SPEED_10G = 0x8, + FUN_PORT_SPEED_25G = 0x10, + FUN_PORT_SPEED_40G = 0x20, + FUN_PORT_SPEED_50G = 0x40, + FUN_PORT_SPEED_100G = 0x80, + FUN_PORT_SPEED_200G = 0x100, +}; + +enum fun_port_duplex_mode { + FUN_PORT_FULL_DUPLEX = 0x0, + FUN_PORT_HALF_DUPLEX = 0x1, +}; + +enum { + FUN_PORT_FEC_NA = 0x0, + FUN_PORT_FEC_OFF = 0x1, + FUN_PORT_FEC_RS = 0x2, + FUN_PORT_FEC_FC = 0x4, + FUN_PORT_FEC_AUTO = 0x8, +}; + +enum fun_port_link_status { + FUN_PORT_LINK_UP = 0x0, + FUN_PORT_LINK_UP_WITH_ERR = 0x1, + FUN_PORT_LINK_DOWN = 0x2, +}; + +enum fun_port_led_type { + FUN_PORT_LED_OFF = 0x0, + FUN_PORT_LED_AMBER = 0x1, + FUN_PORT_LED_GREEN = 0x2, + FUN_PORT_LED_BEACON_ON = 0x3, + FUN_PORT_LED_BEACON_OFF = 0x4, +}; + +enum { + FUN_PORT_FLAG_MAC_DOWN = 0x1, + FUN_PORT_FLAG_MAC_UP = 0x2, + FUN_PORT_FLAG_NH_DOWN = 0x4, + FUN_PORT_FLAG_NH_UP = 0x8, +}; + +enum { + FUN_PORT_FLAG_ENABLE_NOTIFY = 0x1, +}; + +enum fun_port_lane_attr { + FUN_PORT_LANE_1 = 0x1, + FUN_PORT_LANE_2 = 0x2, + FUN_PORT_LANE_4 = 0x4, + FUN_PORT_LANE_SPEED_10G = 0x100, + FUN_PORT_LANE_SPEED_25G = 0x200, + FUN_PORT_LANE_SPEED_50G = 0x400, + FUN_PORT_LANE_SPLIT = 0x8000, +}; + +enum fun_admin_port_subop { + FUN_ADMIN_PORT_SUBOP_XCVR_READ = 0x23, + FUN_ADMIN_PORT_SUBOP_INETADDR_EVENT = 0x24, +}; + +enum fun_admin_port_key { + FUN_ADMIN_PORT_KEY_ILLEGAL = 0x0, + FUN_ADMIN_PORT_KEY_MTU = 0x1, + FUN_ADMIN_PORT_KEY_FEC = 0x2, + FUN_ADMIN_PORT_KEY_SPEED = 0x3, + FUN_ADMIN_PORT_KEY_DEBOUNCE = 0x4, + FUN_ADMIN_PORT_KEY_DUPLEX = 0x5, + FUN_ADMIN_PORT_KEY_MACADDR = 0x6, + FUN_ADMIN_PORT_KEY_LINKMODE = 0x7, + FUN_ADMIN_PORT_KEY_BREAKOUT = 0x8, + FUN_ADMIN_PORT_KEY_ENABLE = 0x9, + FUN_ADMIN_PORT_KEY_DISABLE = 0xa, + FUN_ADMIN_PORT_KEY_ERR_DISABLE = 0xb, + FUN_ADMIN_PORT_KEY_CAPABILITIES = 0xc, + FUN_ADMIN_PORT_KEY_LP_CAPABILITIES = 0xd, + FUN_ADMIN_PORT_KEY_STATS_DMA_LOW = 0xe, + FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH = 0xf, + FUN_ADMIN_PORT_KEY_LANE_ATTRS = 0x10, + FUN_ADMIN_PORT_KEY_LED = 0x11, + FUN_ADMIN_PORT_KEY_ADVERT = 0x12, +}; + +struct fun_subop_imm { + __u8 subop; /* see fun_data_subop enum */ + __u8 flags; + __u8 nsgl; + __u8 rsvd0; + __be32 len; + + __u8 data[]; +}; + +enum fun_subop_sgl_flags { + FUN_SUBOP_SGL_USE_OFF8 = 0x1, + FUN_SUBOP_FLAG_FREE_BUF = 0x2, + FUN_SUBOP_FLAG_IS_REFBUF = 0x4, + FUN_SUBOP_SGL_FLAG_LOCAL = 0x8, +}; + +enum fun_data_op { + FUN_DATAOP_INVALID = 0x0, + FUN_DATAOP_SL = 0x1, /* scatter */ + FUN_DATAOP_GL = 0x2, /* gather */ + FUN_DATAOP_SGL = 0x3, /* scatter-gather */ + FUN_DATAOP_IMM = 0x4, /* immediate data */ + FUN_DATAOP_RQBUF = 0x8, /* rq buffer */ +}; + +struct fun_dataop_gl { + __u8 subop; + __u8 flags; + __be16 sgl_off; + __be32 sgl_len; + + __be64 sgl_data; +}; + +static inline void fun_dataop_gl_init(struct fun_dataop_gl *s, u8 flags, + u16 sgl_off, u32 sgl_len, u64 sgl_data) +{ + s->subop = FUN_DATAOP_GL; + s->flags = flags; + s->sgl_off = cpu_to_be16(sgl_off); + s->sgl_len = cpu_to_be32(sgl_len); + s->sgl_data = cpu_to_be64(sgl_data); +} + +struct fun_dataop_imm { + __u8 subop; + __u8 flags; + __be16 rsvd0; + __be32 sgl_len; +}; + +struct fun_subop_sgl { + __u8 subop; + __u8 flags; + __u8 nsgl; + __u8 rsvd0; + __be32 sgl_len; + + __be64 sgl_data; +}; + +#define FUN_SUBOP_SGL_INIT(_subop, _flags, _nsgl, _sgl_len, _sgl_data) \ + (struct fun_subop_sgl) { \ + .subop = (_subop), .flags = (_flags), .nsgl = (_nsgl), \ + .sgl_len = cpu_to_be32(_sgl_len), \ + .sgl_data = cpu_to_be64(_sgl_data), \ + } + +struct fun_dataop_rqbuf { + __u8 subop; + __u8 rsvd0; + __be16 cid; + __be32 bufoff; +}; + +struct fun_dataop_hdr { + __u8 nsgl; + __u8 flags; + __u8 ngather; + __u8 nscatter; + __be32 total_len; + + struct fun_dataop_imm imm[]; +}; + +#define FUN_DATAOP_HDR_INIT(_nsgl, _flags, _ngather, _nscatter, _total_len) \ + (struct fun_dataop_hdr) { \ + .nsgl = _nsgl, .flags = _flags, .ngather = _ngather, \ + .nscatter = _nscatter, .total_len = cpu_to_be32(_total_len), \ + } + +enum fun_port_inetaddr_event_type { + FUN_PORT_INETADDR_ADD = 0x1, + FUN_PORT_INETADDR_DEL = 0x2, +}; + +enum fun_port_inetaddr_addr_family { + FUN_PORT_INETADDR_IPV4 = 0x1, + FUN_PORT_INETADDR_IPV6 = 0x2, +}; + +struct fun_admin_port_req { + struct fun_admin_req_common common; + + union port_req_subop { + struct fun_admin_port_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_port_write_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; /* portid */ + + struct fun_admin_write48_req write48[]; + } write; + struct fun_admin_port_read_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; /* portid */ + + struct fun_admin_read48_req read48[]; + } read; + struct fun_admin_port_xcvr_read_req { + u8 subop; + u8 rsvd0; + __be16 flags; + __be32 id; + + u8 bank; + u8 page; + u8 offset; + u8 length; + u8 dev_addr; + u8 rsvd1[3]; + } xcvr_read; + struct fun_admin_port_inetaddr_event_req { + __u8 subop; + __u8 rsvd0; + __u8 event_type; + __u8 addr_family; + __be32 id; + + __u8 addr[]; + } inetaddr_event; + } u; +}; + +#define FUN_ADMIN_PORT_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_PORT_WRITE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_write_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_PORT_READ_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_port_read_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_PORT_XCVR_READ_REQ_INIT(_flags, _id, _bank, _page, \ + _offset, _length, _dev_addr) \ + ((struct fun_admin_port_xcvr_read_req) { \ + .subop = FUN_ADMIN_PORT_SUBOP_XCVR_READ, \ + .flags = cpu_to_be16(_flags), .id = cpu_to_be32(_id), \ + .bank = (_bank), .page = (_page), .offset = (_offset), \ + .length = (_length), .dev_addr = (_dev_addr), \ + }) + +struct fun_admin_port_rsp { + struct fun_admin_rsp_common common; + + union port_rsp_subop { + struct fun_admin_port_create_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be16 lport; + __u8 rsvd1[6]; + } create; + struct fun_admin_port_write_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + + struct fun_admin_write48_rsp write48[]; + } write; + struct fun_admin_port_read_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + + struct fun_admin_read48_rsp read48[]; + } read; + struct fun_admin_port_inetaddr_event_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; /* portid */ + } inetaddr_event; + } u; +}; + +struct fun_admin_port_xcvr_read_rsp { + struct fun_admin_rsp_common common; + + u8 subop; + u8 rsvd0[3]; + __be32 id; + + u8 bank; + u8 page; + u8 offset; + u8 length; + u8 dev_addr; + u8 rsvd1[3]; + + u8 data[128]; +}; + +enum fun_xcvr_type { + FUN_XCVR_BASET = 0x0, + FUN_XCVR_CU = 0x1, + FUN_XCVR_SMF = 0x2, + FUN_XCVR_MMF = 0x3, + FUN_XCVR_AOC = 0x4, + FUN_XCVR_SFPP = 0x10, /* SFP+ or later */ + FUN_XCVR_QSFPP = 0x11, /* QSFP+ or later */ + FUN_XCVR_QSFPDD = 0x12, /* QSFP-DD */ +}; + +struct fun_admin_port_notif { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0; + __be16 id; + __be32 speed; /* in 10 Mbps units */ + + __u8 link_state; + __u8 missed_events; + __u8 link_down_reason; + __u8 xcvr_type; + __u8 flow_ctrl; + __u8 fec; + __u8 active_lanes; + __u8 rsvd1; + + __be64 advertising; + + __be64 lp_advertising; +}; + +enum fun_eth_rss_const { + FUN_ETH_RSS_MAX_KEY_SIZE = 0x28, + FUN_ETH_RSS_MAX_INDIR_ENT = 0x40, +}; + +enum fun_eth_hash_alg { + FUN_ETH_RSS_ALG_INVALID = 0x0, + FUN_ETH_RSS_ALG_TOEPLITZ = 0x1, + FUN_ETH_RSS_ALG_CRC32 = 0x2, +}; + +struct fun_admin_rss_req { + struct fun_admin_req_common common; + + union rss_req_subop { + struct fun_admin_rss_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 viid; /* VI flow id */ + + __be64 metadata[1]; + + __u8 alg; + __u8 keylen; + __u8 indir_nent; + __u8 rsvd2; + __be16 key_off; + __be16 indir_off; + + struct fun_dataop_hdr dataop; + } create; + } u; +}; + +#define FUN_ADMIN_RSS_CREATE_REQ_INIT(_subop, _flags, _id, _viid, _alg, \ + _keylen, _indir_nent, _key_off, \ + _indir_off) \ + (struct fun_admin_rss_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .viid = cpu_to_be32(_viid), \ + .alg = _alg, .keylen = _keylen, .indir_nent = _indir_nent, \ + .key_off = cpu_to_be16(_key_off), \ + .indir_off = cpu_to_be16(_indir_off), \ + } + +struct fun_admin_vi_req { + struct fun_admin_req_common common; + + union vi_req_subop { + struct fun_admin_vi_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 portid; /* port flow id */ + } create; + } u; +}; + +#define FUN_ADMIN_VI_CREATE_REQ_INIT(_subop, _flags, _id, _portid) \ + (struct fun_admin_vi_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .portid = cpu_to_be32(_portid), \ + } + +struct fun_admin_eth_req { + struct fun_admin_req_common common; + + union eth_req_subop { + struct fun_admin_eth_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 rsvd1; + __be32 portid; /* port flow id */ + } create; + } u; +}; + +#define FUN_ADMIN_ETH_CREATE_REQ_INIT(_subop, _flags, _id, _portid) \ + (struct fun_admin_eth_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .portid = cpu_to_be32(_portid), \ + } + +enum { + FUN_ADMIN_SWU_UPGRADE_FLAG_INIT = 0x10, + FUN_ADMIN_SWU_UPGRADE_FLAG_COMPLETE = 0x20, + FUN_ADMIN_SWU_UPGRADE_FLAG_DOWNGRADE = 0x40, + FUN_ADMIN_SWU_UPGRADE_FLAG_ACTIVE_IMAGE = 0x80, + FUN_ADMIN_SWU_UPGRADE_FLAG_ASYNC = 0x1, +}; + +enum fun_admin_swu_subop { + FUN_ADMIN_SWU_SUBOP_GET_VERSION = 0x20, + FUN_ADMIN_SWU_SUBOP_UPGRADE = 0x21, + FUN_ADMIN_SWU_SUBOP_UPGRADE_DATA = 0x22, + FUN_ADMIN_SWU_SUBOP_GET_ALL_VERSIONS = 0x23, +}; + +struct fun_admin_swu_req { + struct fun_admin_req_common common; + + union swu_req_subop { + struct fun_admin_swu_create_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_swu_upgrade_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 fourcc; + __be32 rsvd1; + + __be64 image_size; /* upgrade image length */ + } upgrade; + struct fun_admin_swu_upgrade_data_req { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 offset; /* offset of data in this command */ + __be32 size; /* total size of data in this command */ + } upgrade_data; + } u; + + struct fun_subop_sgl sgl[]; /* in, out buffers through sgl */ +}; + +#define FUN_ADMIN_SWU_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_swu_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +#define FUN_ADMIN_SWU_UPGRADE_REQ_INIT(_subop, _flags, _id, _fourcc, \ + _image_size) \ + (struct fun_admin_swu_upgrade_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .fourcc = cpu_to_be32(_fourcc), \ + .image_size = cpu_to_be64(_image_size), \ + } + +#define FUN_ADMIN_SWU_UPGRADE_DATA_REQ_INIT(_subop, _flags, _id, _offset, \ + _size) \ + (struct fun_admin_swu_upgrade_data_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .offset = cpu_to_be32(_offset), \ + .size = cpu_to_be32(_size), \ + } + +struct fun_admin_swu_rsp { + struct fun_admin_rsp_common common; + + union swu_rsp_subop { + struct fun_admin_swu_create_rsp { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + } create; + struct fun_admin_swu_upgrade_rsp { + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be32 fourcc; + __be32 status; + + __be32 progress; + __be32 unused; + } upgrade; + struct fun_admin_swu_upgrade_data_rsp { + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be32 offset; + __be32 size; + } upgrade_data; + } u; +}; + +enum fun_ktls_version { + FUN_KTLS_TLSV2 = 0x20, + FUN_KTLS_TLSV3 = 0x30, +}; + +enum fun_ktls_cipher { + FUN_KTLS_CIPHER_AES_GCM_128 = 0x33, + FUN_KTLS_CIPHER_AES_GCM_256 = 0x34, + FUN_KTLS_CIPHER_AES_CCM_128 = 0x35, + FUN_KTLS_CIPHER_CHACHA20_POLY1305 = 0x36, +}; + +enum fun_ktls_modify_flags { + FUN_KTLS_MODIFY_REMOVE = 0x1, +}; + +struct fun_admin_ktls_create_req { + struct fun_admin_req_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; +}; + +#define FUN_ADMIN_KTLS_CREATE_REQ_INIT(_subop, _flags, _id) \ + (struct fun_admin_ktls_create_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), \ + } + +struct fun_admin_ktls_create_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0[3]; + __be32 id; +}; + +struct fun_admin_ktls_modify_req { + struct fun_admin_req_common common; + + __u8 subop; + __u8 rsvd0; + __be16 flags; + __be32 id; + + __be64 tlsid; + + __be32 tcp_seq; + __u8 version; + __u8 cipher; + __u8 rsvd1[2]; + + __u8 record_seq[8]; + + __u8 key[32]; + + __u8 iv[16]; + + __u8 salt[8]; +}; + +#define FUN_ADMIN_KTLS_MODIFY_REQ_INIT(_subop, _flags, _id, _tlsid, _tcp_seq, \ + _version, _cipher) \ + (struct fun_admin_ktls_modify_req) { \ + .subop = (_subop), .flags = cpu_to_be16(_flags), \ + .id = cpu_to_be32(_id), .tlsid = cpu_to_be64(_tlsid), \ + .tcp_seq = cpu_to_be32(_tcp_seq), .version = _version, \ + .cipher = _cipher, \ + } + +struct fun_admin_ktls_modify_rsp { + struct fun_admin_rsp_common common; + + __u8 subop; + __u8 rsvd0[3]; + __be32 id; + + __be64 tlsid; +}; + +struct fun_req_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 rsvd0; + __be16 cid; +}; + +struct fun_rsp_common { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 ret; + __be16 cid; +}; + +struct fun_cqe_info { + __be16 sqhd; + __be16 sqid; + __be16 cid; + __be16 sf_p; +}; + +enum fun_eprq_def { + FUN_EPRQ_PKT_ALIGN = 0x80, +}; + +struct fun_eprq_rqbuf { + __be64 bufaddr; +}; + +#define FUN_EPRQ_RQBUF_INIT(_bufaddr) \ + (struct fun_eprq_rqbuf) { \ + .bufaddr = cpu_to_be64(_bufaddr), \ + } + +enum fun_eth_op { + FUN_ETH_OP_TX = 0x1, + FUN_ETH_OP_RX = 0x2, +}; + +enum { + FUN_ETH_OFFLOAD_EN = 0x8000, + FUN_ETH_OUTER_EN = 0x4000, + FUN_ETH_INNER_LSO = 0x2000, + FUN_ETH_INNER_TSO = 0x1000, + FUN_ETH_OUTER_IPV6 = 0x800, + FUN_ETH_OUTER_UDP = 0x400, + FUN_ETH_INNER_IPV6 = 0x200, + FUN_ETH_INNER_UDP = 0x100, + FUN_ETH_UPDATE_OUTER_L3_LEN = 0x80, + FUN_ETH_UPDATE_OUTER_L3_CKSUM = 0x40, + FUN_ETH_UPDATE_OUTER_L4_LEN = 0x20, + FUN_ETH_UPDATE_OUTER_L4_CKSUM = 0x10, + FUN_ETH_UPDATE_INNER_L3_LEN = 0x8, + FUN_ETH_UPDATE_INNER_L3_CKSUM = 0x4, + FUN_ETH_UPDATE_INNER_L4_LEN = 0x2, + FUN_ETH_UPDATE_INNER_L4_CKSUM = 0x1, +}; + +struct fun_eth_offload { + __be16 flags; /* combination of above flags */ + __be16 mss; /* TSO max seg size */ + __be16 tcp_doff_flags; /* TCP data offset + flags 16b word */ + __be16 vlan; + + __be16 inner_l3_off; /* Inner L3 header offset */ + __be16 inner_l4_off; /* Inner L4 header offset */ + __be16 outer_l3_off; /* Outer L3 header offset */ + __be16 outer_l4_off; /* Outer L4 header offset */ +}; + +static inline void fun_eth_offload_init(struct fun_eth_offload *s, u16 flags, + u16 mss, __be16 tcp_doff_flags, + __be16 vlan, u16 inner_l3_off, + u16 inner_l4_off, u16 outer_l3_off, + u16 outer_l4_off) +{ + s->flags = cpu_to_be16(flags); + s->mss = cpu_to_be16(mss); + s->tcp_doff_flags = tcp_doff_flags; + s->vlan = vlan; + s->inner_l3_off = cpu_to_be16(inner_l3_off); + s->inner_l4_off = cpu_to_be16(inner_l4_off); + s->outer_l3_off = cpu_to_be16(outer_l3_off); + s->outer_l4_off = cpu_to_be16(outer_l4_off); +} + +struct fun_eth_tls { + __be64 tlsid; +}; + +enum { + FUN_ETH_TX_TLS = 0x8000, +}; + +struct fun_eth_tx_req { + __u8 op; + __u8 len8; + __be16 flags; + __u8 suboff8; + __u8 repr_idn; + __be16 encap_proto; + + struct fun_eth_offload offload; + + struct fun_dataop_hdr dataop; +}; + +struct fun_eth_rx_cv { + __be16 il4_prot_to_l2_type; +}; + +#define FUN_ETH_RX_CV_IL4_PROT_S 13U +#define FUN_ETH_RX_CV_IL4_PROT_M 0x3 + +#define FUN_ETH_RX_CV_IL3_PROT_S 11U +#define FUN_ETH_RX_CV_IL3_PROT_M 0x3 + +#define FUN_ETH_RX_CV_OL4_PROT_S 8U +#define FUN_ETH_RX_CV_OL4_PROT_M 0x7 + +#define FUN_ETH_RX_CV_ENCAP_TYPE_S 6U +#define FUN_ETH_RX_CV_ENCAP_TYPE_M 0x3 + +#define FUN_ETH_RX_CV_OL3_PROT_S 4U +#define FUN_ETH_RX_CV_OL3_PROT_M 0x3 + +#define FUN_ETH_RX_CV_VLAN_TYPE_S 3U +#define FUN_ETH_RX_CV_VLAN_TYPE_M 0x1 + +#define FUN_ETH_RX_CV_L2_TYPE_S 2U +#define FUN_ETH_RX_CV_L2_TYPE_M 0x1 + +enum fun_rx_cv { + FUN_RX_CV_NONE = 0x0, + FUN_RX_CV_IP = 0x2, + FUN_RX_CV_IP6 = 0x3, + FUN_RX_CV_TCP = 0x2, + FUN_RX_CV_UDP = 0x3, + FUN_RX_CV_VXLAN = 0x2, + FUN_RX_CV_MPLS = 0x3, +}; + +struct fun_eth_cqe { + __u8 op; + __u8 len8; + __u8 nsgl; + __u8 repr_idn; + __be32 pkt_len; + + __be64 timestamp; + + __be16 pkt_cv; + __be16 rsvd0; + __be32 hash; + + __be16 encap_proto; + __be16 vlan; + __be32 rsvd1; + + __be32 buf_offset; + __be16 headroom; + __be16 csum; +}; + +enum fun_admin_adi_attr { + FUN_ADMIN_ADI_ATTR_MACADDR = 0x1, + FUN_ADMIN_ADI_ATTR_VLAN = 0x2, + FUN_ADMIN_ADI_ATTR_RATE = 0x3, +}; + +struct fun_adi_param { + union adi_param { + struct fun_adi_mac { + __be64 addr; + } mac; + struct fun_adi_vlan { + __be32 rsvd; + __be16 eth_type; + __be16 tci; + } vlan; + struct fun_adi_rate { + __be32 rsvd; + __be32 tx_mbps; + } rate; + } u; +}; + +#define FUN_ADI_MAC_INIT(_addr) \ + (struct fun_adi_mac) { \ + .addr = cpu_to_be64(_addr), \ + } + +#define FUN_ADI_VLAN_INIT(_eth_type, _tci) \ + (struct fun_adi_vlan) { \ + .eth_type = cpu_to_be16(_eth_type), .tci = cpu_to_be16(_tci), \ + } + +#define FUN_ADI_RATE_INIT(_tx_mbps) \ + (struct fun_adi_rate) { \ + .tx_mbps = cpu_to_be32(_tx_mbps), \ + } + +struct fun_admin_adi_req { + struct fun_admin_req_common common; + + union adi_req_subop { + struct fun_admin_adi_write_req { + __u8 subop; + __u8 attribute; + __be16 rsvd; + __be32 id; + + struct fun_adi_param param; + } write; + } u; +}; + +#define FUN_ADMIN_ADI_WRITE_REQ_INIT(_subop, _attribute, _id) \ + (struct fun_admin_adi_write_req) { \ + .subop = (_subop), .attribute = (_attribute), \ + .id = cpu_to_be32(_id), \ + } + +#endif /* __FUN_HCI_H */ diff --git a/drivers/net/ethernet/fungible/funcore/fun_queue.c b/drivers/net/ethernet/fungible/funcore/fun_queue.c new file mode 100644 index 000000000..8ab9f6843 --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_queue.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/log2.h> +#include <linux/mm.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include "fun_dev.h" +#include "fun_queue.h" + +/* Allocate memory for a queue. This includes the memory for the HW descriptor + * ring, an optional 64b HW write-back area, and an optional SW state ring. + * Returns the virtual and DMA addresses of the HW ring, the VA of the SW ring, + * and the VA of the write-back area. + */ +void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth, + size_t hw_desc_sz, size_t sw_desc_sz, bool wb, + int numa_node, dma_addr_t *dma_addr, void **sw_va, + volatile __be64 **wb_va) +{ + int dev_node = dev_to_node(dma_dev); + size_t dma_sz; + void *va; + + if (numa_node == NUMA_NO_NODE) + numa_node = dev_node; + + /* Place optional write-back area at end of descriptor ring. */ + dma_sz = hw_desc_sz * depth; + if (wb) + dma_sz += sizeof(u64); + + set_dev_node(dma_dev, numa_node); + va = dma_alloc_coherent(dma_dev, dma_sz, dma_addr, GFP_KERNEL); + set_dev_node(dma_dev, dev_node); + if (!va) + return NULL; + + if (sw_desc_sz) { + *sw_va = kvzalloc_node(sw_desc_sz * depth, GFP_KERNEL, + numa_node); + if (!*sw_va) { + dma_free_coherent(dma_dev, dma_sz, va, *dma_addr); + return NULL; + } + } + + if (wb) + *wb_va = va + dma_sz - sizeof(u64); + return va; +} +EXPORT_SYMBOL_GPL(fun_alloc_ring_mem); + +void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz, + bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va) +{ + if (hw_va) { + size_t sz = depth * hw_desc_sz; + + if (wb) + sz += sizeof(u64); + dma_free_coherent(dma_dev, sz, hw_va, dma_addr); + } + kvfree(sw_va); +} +EXPORT_SYMBOL_GPL(fun_free_ring_mem); + +/* Prepare and issue an admin command to create an SQ on the device with the + * provided parameters. If the queue ID is auto-allocated by the device it is + * returned in *sqidp. + */ +int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid, + u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr, + u8 coal_nentries, u8 coal_usec, u32 irq_num, + u32 scan_start_id, u32 scan_end_id, + u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp) +{ + union { + struct fun_admin_epsq_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + dma_addr_t wb_addr; + u32 hw_qid; + int rc; + + if (sq_depth > fdev->q_depth) + return -EINVAL; + if (flags & FUN_ADMIN_EPSQ_CREATE_FLAG_RQ) + sqe_size_log2 = ilog2(sizeof(struct fun_eprq_rqbuf)); + + wb_addr = dma_addr + (sq_depth << sqe_size_log2); + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPSQ, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_EPSQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags, + sqid, cqid, sqe_size_log2, + sq_depth - 1, dma_addr, 0, + coal_nentries, coal_usec, + irq_num, scan_start_id, + scan_end_id, 0, + rq_buf_size_log2, + ilog2(sizeof(u64)), wb_addr); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (rc) + return rc; + + hw_qid = be32_to_cpu(cmd.rsp.id); + *dbp = fun_sq_db_addr(fdev, hw_qid); + if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR) + *sqidp = hw_qid; + return rc; +} +EXPORT_SYMBOL_GPL(fun_sq_create); + +/* Prepare and issue an admin command to create a CQ on the device with the + * provided parameters. If the queue ID is auto-allocated by the device it is + * returned in *cqidp. + */ +int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid, + u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr, + u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec, + u32 irq_num, u32 scan_start_id, u32 scan_end_id, u32 *cqidp, + u32 __iomem **dbp) +{ + union { + struct fun_admin_epcq_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + u32 hw_qid; + int rc; + + if (cq_depth > fdev->q_depth) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_EPCQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags, + cqid, rqid, cqe_size_log2, + cq_depth - 1, dma_addr, tailroom, + headroom / 2, 0, coal_nentries, + coal_usec, irq_num, + scan_start_id, scan_end_id, 0); + + rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (rc) + return rc; + + hw_qid = be32_to_cpu(cmd.rsp.id); + *dbp = fun_cq_db_addr(fdev, hw_qid); + if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR) + *cqidp = hw_qid; + return rc; +} +EXPORT_SYMBOL_GPL(fun_cq_create); + +static bool fun_sq_is_head_wb(const struct fun_queue *funq) +{ + return funq->sq_flags & FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS; +} + +static void fun_clean_rq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + struct fun_rq_info *rqinfo; + unsigned int i; + + for (i = 0; i < funq->rq_depth; i++) { + rqinfo = &funq->rq_info[i]; + if (rqinfo->page) { + dma_unmap_page(fdev->dev, rqinfo->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + put_page(rqinfo->page); + rqinfo->page = NULL; + } + } +} + +static int fun_fill_rq(struct fun_queue *funq) +{ + struct device *dev = funq->fdev->dev; + int i, node = dev_to_node(dev); + struct fun_rq_info *rqinfo; + + for (i = 0; i < funq->rq_depth; i++) { + rqinfo = &funq->rq_info[i]; + rqinfo->page = alloc_pages_node(node, GFP_KERNEL, 0); + if (unlikely(!rqinfo->page)) + return -ENOMEM; + + rqinfo->dma = dma_map_page(dev, rqinfo->page, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, rqinfo->dma))) { + put_page(rqinfo->page); + rqinfo->page = NULL; + return -ENOMEM; + } + + funq->rqes[i] = FUN_EPRQ_RQBUF_INIT(rqinfo->dma); + } + + funq->rq_tail = funq->rq_depth - 1; + return 0; +} + +static void fun_rq_update_pos(struct fun_queue *funq, int buf_offset) +{ + if (buf_offset <= funq->rq_buf_offset) { + struct fun_rq_info *rqinfo = &funq->rq_info[funq->rq_buf_idx]; + struct device *dev = funq->fdev->dev; + + dma_sync_single_for_device(dev, rqinfo->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + funq->num_rqe_to_fill++; + if (++funq->rq_buf_idx == funq->rq_depth) + funq->rq_buf_idx = 0; + } + funq->rq_buf_offset = buf_offset; +} + +/* Given a command response with data scattered across >= 1 RQ buffers return + * a pointer to a contiguous buffer containing all the data. If the data is in + * one RQ buffer the start address within that buffer is returned, otherwise a + * new buffer is allocated and the data is gathered into it. + */ +static void *fun_data_from_rq(struct fun_queue *funq, + const struct fun_rsp_common *rsp, bool *need_free) +{ + u32 bufoff, total_len, remaining, fragsize, dataoff; + struct device *dma_dev = funq->fdev->dev; + const struct fun_dataop_rqbuf *databuf; + const struct fun_dataop_hdr *dataop; + const struct fun_rq_info *rqinfo; + void *data; + + dataop = (void *)rsp + rsp->suboff8 * 8; + total_len = be32_to_cpu(dataop->total_len); + + if (likely(dataop->nsgl == 1)) { + databuf = (struct fun_dataop_rqbuf *)dataop->imm; + bufoff = be32_to_cpu(databuf->bufoff); + fun_rq_update_pos(funq, bufoff); + rqinfo = &funq->rq_info[funq->rq_buf_idx]; + dma_sync_single_for_cpu(dma_dev, rqinfo->dma + bufoff, + total_len, DMA_FROM_DEVICE); + *need_free = false; + return page_address(rqinfo->page) + bufoff; + } + + /* For scattered completions gather the fragments into one buffer. */ + + data = kmalloc(total_len, GFP_ATOMIC); + /* NULL is OK here. In case of failure we still need to consume the data + * for proper buffer accounting but indicate an error in the response. + */ + if (likely(data)) + *need_free = true; + + dataoff = 0; + for (remaining = total_len; remaining; remaining -= fragsize) { + fun_rq_update_pos(funq, 0); + fragsize = min_t(unsigned int, PAGE_SIZE, remaining); + if (data) { + rqinfo = &funq->rq_info[funq->rq_buf_idx]; + dma_sync_single_for_cpu(dma_dev, rqinfo->dma, fragsize, + DMA_FROM_DEVICE); + memcpy(data + dataoff, page_address(rqinfo->page), + fragsize); + dataoff += fragsize; + } + } + return data; +} + +unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max) +{ + const struct fun_cqe_info *info; + struct fun_rsp_common *rsp; + unsigned int new_cqes; + u16 sf_p, flags; + bool need_free; + void *cqe; + + if (!max) + max = funq->cq_depth - 1; + + for (new_cqes = 0; new_cqes < max; new_cqes++) { + cqe = funq->cqes + (funq->cq_head << funq->cqe_size_log2); + info = funq_cqe_info(funq, cqe); + sf_p = be16_to_cpu(info->sf_p); + + if ((sf_p & 1) != funq->cq_phase) + break; + + /* ensure the phase tag is read before other CQE fields */ + dma_rmb(); + + if (++funq->cq_head == funq->cq_depth) { + funq->cq_head = 0; + funq->cq_phase = !funq->cq_phase; + } + + rsp = cqe; + flags = be16_to_cpu(rsp->flags); + + need_free = false; + if (unlikely(flags & FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF)) { + rsp = fun_data_from_rq(funq, rsp, &need_free); + if (!rsp) { + rsp = cqe; + rsp->len8 = 1; + if (rsp->ret == 0) + rsp->ret = ENOMEM; + } + } + + if (funq->cq_cb) + funq->cq_cb(funq, funq->cb_data, rsp, info); + if (need_free) + kfree(rsp); + } + + dev_dbg(funq->fdev->dev, "CQ %u, new CQEs %u/%u, head %u, phase %u\n", + funq->cqid, new_cqes, max, funq->cq_head, funq->cq_phase); + return new_cqes; +} + +unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max) +{ + unsigned int processed; + u32 db; + + processed = __fun_process_cq(funq, max); + + if (funq->num_rqe_to_fill) { + funq->rq_tail = (funq->rq_tail + funq->num_rqe_to_fill) % + funq->rq_depth; + funq->num_rqe_to_fill = 0; + writel(funq->rq_tail, funq->rq_db); + } + + db = funq->cq_head | FUN_DB_IRQ_ARM_F; + writel(db, funq->cq_db); + return processed; +} + +static int fun_alloc_sqes(struct fun_queue *funq) +{ + funq->sq_cmds = fun_alloc_ring_mem(funq->fdev->dev, funq->sq_depth, + 1 << funq->sqe_size_log2, 0, + fun_sq_is_head_wb(funq), + NUMA_NO_NODE, &funq->sq_dma_addr, + NULL, &funq->sq_head); + return funq->sq_cmds ? 0 : -ENOMEM; +} + +static int fun_alloc_cqes(struct fun_queue *funq) +{ + funq->cqes = fun_alloc_ring_mem(funq->fdev->dev, funq->cq_depth, + 1 << funq->cqe_size_log2, 0, false, + NUMA_NO_NODE, &funq->cq_dma_addr, NULL, + NULL); + return funq->cqes ? 0 : -ENOMEM; +} + +static int fun_alloc_rqes(struct fun_queue *funq) +{ + funq->rqes = fun_alloc_ring_mem(funq->fdev->dev, funq->rq_depth, + sizeof(*funq->rqes), + sizeof(*funq->rq_info), false, + NUMA_NO_NODE, &funq->rq_dma_addr, + (void **)&funq->rq_info, NULL); + return funq->rqes ? 0 : -ENOMEM; +} + +/* Free a queue's structures. */ +void fun_free_queue(struct fun_queue *funq) +{ + struct device *dev = funq->fdev->dev; + + fun_free_ring_mem(dev, funq->cq_depth, 1 << funq->cqe_size_log2, false, + funq->cqes, funq->cq_dma_addr, NULL); + fun_free_ring_mem(dev, funq->sq_depth, 1 << funq->sqe_size_log2, + fun_sq_is_head_wb(funq), funq->sq_cmds, + funq->sq_dma_addr, NULL); + + if (funq->rqes) { + fun_clean_rq(funq); + fun_free_ring_mem(dev, funq->rq_depth, sizeof(*funq->rqes), + false, funq->rqes, funq->rq_dma_addr, + funq->rq_info); + } + + kfree(funq); +} + +/* Allocate and initialize a funq's structures. */ +struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid, + const struct fun_queue_alloc_req *req) +{ + struct fun_queue *funq = kzalloc(sizeof(*funq), GFP_KERNEL); + + if (!funq) + return NULL; + + funq->fdev = fdev; + spin_lock_init(&funq->sq_lock); + + funq->qid = qid; + + /* Initial CQ/SQ/RQ ids */ + if (req->rq_depth) { + funq->cqid = 2 * qid; + if (funq->qid) { + /* I/O Q: use rqid = cqid, sqid = +1 */ + funq->rqid = funq->cqid; + funq->sqid = funq->rqid + 1; + } else { + /* Admin Q: sqid is always 0, use ID 1 for RQ */ + funq->sqid = 0; + funq->rqid = 1; + } + } else { + funq->cqid = qid; + funq->sqid = qid; + } + + funq->cq_flags = req->cq_flags; + funq->sq_flags = req->sq_flags; + + funq->cqe_size_log2 = req->cqe_size_log2; + funq->sqe_size_log2 = req->sqe_size_log2; + + funq->cq_depth = req->cq_depth; + funq->sq_depth = req->sq_depth; + + funq->cq_intcoal_nentries = req->cq_intcoal_nentries; + funq->cq_intcoal_usec = req->cq_intcoal_usec; + + funq->sq_intcoal_nentries = req->sq_intcoal_nentries; + funq->sq_intcoal_usec = req->sq_intcoal_usec; + + if (fun_alloc_cqes(funq)) + goto free_funq; + + funq->cq_phase = 1; + + if (fun_alloc_sqes(funq)) + goto free_funq; + + if (req->rq_depth) { + funq->rq_flags = req->rq_flags | FUN_ADMIN_EPSQ_CREATE_FLAG_RQ; + funq->rq_depth = req->rq_depth; + funq->rq_buf_offset = -1; + + if (fun_alloc_rqes(funq) || fun_fill_rq(funq)) + goto free_funq; + } + + funq->cq_vector = -1; + funq->cqe_info_offset = (1 << funq->cqe_size_log2) - sizeof(struct fun_cqe_info); + + /* SQ/CQ 0 are implicitly created, assign their doorbells now. + * Other queues are assigned doorbells at their explicit creation. + */ + if (funq->sqid == 0) + funq->sq_db = fun_sq_db_addr(fdev, 0); + if (funq->cqid == 0) + funq->cq_db = fun_cq_db_addr(fdev, 0); + + return funq; + +free_funq: + fun_free_queue(funq); + return NULL; +} + +/* Create a funq's CQ on the device. */ +static int fun_create_cq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + unsigned int rqid; + int rc; + + rqid = funq->cq_flags & FUN_ADMIN_EPCQ_CREATE_FLAG_RQ ? + funq->rqid : FUN_HCI_ID_INVALID; + rc = fun_cq_create(fdev, funq->cq_flags, funq->cqid, rqid, + funq->cqe_size_log2, funq->cq_depth, + funq->cq_dma_addr, 0, 0, funq->cq_intcoal_nentries, + funq->cq_intcoal_usec, funq->cq_vector, 0, 0, + &funq->cqid, &funq->cq_db); + if (!rc) + dev_dbg(fdev->dev, "created CQ %u\n", funq->cqid); + + return rc; +} + +/* Create a funq's SQ on the device. */ +static int fun_create_sq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + int rc; + + rc = fun_sq_create(fdev, funq->sq_flags, funq->sqid, funq->cqid, + funq->sqe_size_log2, funq->sq_depth, + funq->sq_dma_addr, funq->sq_intcoal_nentries, + funq->sq_intcoal_usec, funq->cq_vector, 0, 0, + 0, &funq->sqid, &funq->sq_db); + if (!rc) + dev_dbg(fdev->dev, "created SQ %u\n", funq->sqid); + + return rc; +} + +/* Create a funq's RQ on the device. */ +int fun_create_rq(struct fun_queue *funq) +{ + struct fun_dev *fdev = funq->fdev; + int rc; + + rc = fun_sq_create(fdev, funq->rq_flags, funq->rqid, funq->cqid, 0, + funq->rq_depth, funq->rq_dma_addr, 0, 0, + funq->cq_vector, 0, 0, PAGE_SHIFT, &funq->rqid, + &funq->rq_db); + if (!rc) + dev_dbg(fdev->dev, "created RQ %u\n", funq->rqid); + + return rc; +} + +static unsigned int funq_irq(struct fun_queue *funq) +{ + return pci_irq_vector(to_pci_dev(funq->fdev->dev), funq->cq_vector); +} + +int fun_request_irq(struct fun_queue *funq, const char *devname, + irq_handler_t handler, void *data) +{ + int rc; + + if (funq->cq_vector < 0) + return -EINVAL; + + funq->irq_handler = handler; + funq->irq_data = data; + + snprintf(funq->irqname, sizeof(funq->irqname), + funq->qid ? "%s-q[%d]" : "%s-adminq", devname, funq->qid); + + rc = request_irq(funq_irq(funq), handler, 0, funq->irqname, data); + if (rc) + funq->irq_handler = NULL; + + return rc; +} + +/* Create all component queues of a funq on the device. */ +int fun_create_queue(struct fun_queue *funq) +{ + int rc; + + rc = fun_create_cq(funq); + if (rc) + return rc; + + if (funq->rq_depth) { + rc = fun_create_rq(funq); + if (rc) + goto release_cq; + } + + rc = fun_create_sq(funq); + if (rc) + goto release_rq; + + return 0; + +release_rq: + fun_destroy_sq(funq->fdev, funq->rqid); +release_cq: + fun_destroy_cq(funq->fdev, funq->cqid); + return rc; +} + +void fun_free_irq(struct fun_queue *funq) +{ + if (funq->irq_handler) { + unsigned int vector = funq_irq(funq); + + free_irq(vector, funq->irq_data); + funq->irq_handler = NULL; + funq->irq_data = NULL; + } +} diff --git a/drivers/net/ethernet/fungible/funcore/fun_queue.h b/drivers/net/ethernet/fungible/funcore/fun_queue.h new file mode 100644 index 000000000..7fb53d0ae --- /dev/null +++ b/drivers/net/ethernet/fungible/funcore/fun_queue.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_QEUEUE_H +#define _FUN_QEUEUE_H + +#include <linux/interrupt.h> +#include <linux/io.h> + +struct device; +struct fun_dev; +struct fun_queue; +struct fun_cqe_info; +struct fun_rsp_common; + +typedef void (*cq_callback_t)(struct fun_queue *funq, void *data, void *msg, + const struct fun_cqe_info *info); + +struct fun_rq_info { + dma_addr_t dma; + struct page *page; +}; + +/* A queue group consisting of an SQ, a CQ, and an optional RQ. */ +struct fun_queue { + struct fun_dev *fdev; + spinlock_t sq_lock; + + dma_addr_t cq_dma_addr; + dma_addr_t sq_dma_addr; + dma_addr_t rq_dma_addr; + + u32 __iomem *cq_db; + u32 __iomem *sq_db; + u32 __iomem *rq_db; + + void *cqes; + void *sq_cmds; + struct fun_eprq_rqbuf *rqes; + struct fun_rq_info *rq_info; + + u32 cqid; + u32 sqid; + u32 rqid; + + u32 cq_depth; + u32 sq_depth; + u32 rq_depth; + + u16 cq_head; + u16 sq_tail; + u16 rq_tail; + + u8 cqe_size_log2; + u8 sqe_size_log2; + + u16 cqe_info_offset; + + u16 rq_buf_idx; + int rq_buf_offset; + u16 num_rqe_to_fill; + + u8 cq_intcoal_usec; + u8 cq_intcoal_nentries; + u8 sq_intcoal_usec; + u8 sq_intcoal_nentries; + + u16 cq_flags; + u16 sq_flags; + u16 rq_flags; + + /* SQ head writeback */ + u16 sq_comp; + + volatile __be64 *sq_head; + + cq_callback_t cq_cb; + void *cb_data; + + irq_handler_t irq_handler; + void *irq_data; + s16 cq_vector; + u8 cq_phase; + + /* I/O q index */ + u16 qid; + + char irqname[24]; +}; + +static inline void *fun_sqe_at(const struct fun_queue *funq, unsigned int pos) +{ + return funq->sq_cmds + (pos << funq->sqe_size_log2); +} + +static inline void funq_sq_post_tail(struct fun_queue *funq, u16 tail) +{ + if (++tail == funq->sq_depth) + tail = 0; + funq->sq_tail = tail; + writel(tail, funq->sq_db); +} + +static inline struct fun_cqe_info *funq_cqe_info(const struct fun_queue *funq, + void *cqe) +{ + return cqe + funq->cqe_info_offset; +} + +static inline void funq_rq_post(struct fun_queue *funq) +{ + writel(funq->rq_tail, funq->rq_db); +} + +struct fun_queue_alloc_req { + u8 cqe_size_log2; + u8 sqe_size_log2; + + u16 cq_flags; + u16 sq_flags; + u16 rq_flags; + + u32 cq_depth; + u32 sq_depth; + u32 rq_depth; + + u8 cq_intcoal_usec; + u8 cq_intcoal_nentries; + u8 sq_intcoal_usec; + u8 sq_intcoal_nentries; +}; + +int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid, + u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr, + u8 coal_nentries, u8 coal_usec, u32 irq_num, + u32 scan_start_id, u32 scan_end_id, + u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp); +int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid, + u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr, + u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec, + u32 irq_num, u32 scan_start_id, u32 scan_end_id, + u32 *cqidp, u32 __iomem **dbp); +void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth, + size_t hw_desc_sz, size_t sw_desc_size, bool wb, + int numa_node, dma_addr_t *dma_addr, void **sw_va, + volatile __be64 **wb_va); +void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz, + bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va); + +#define fun_destroy_sq(fdev, sqid) \ + fun_res_destroy((fdev), FUN_ADMIN_OP_EPSQ, 0, (sqid)) +#define fun_destroy_cq(fdev, cqid) \ + fun_res_destroy((fdev), FUN_ADMIN_OP_EPCQ, 0, (cqid)) + +struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid, + const struct fun_queue_alloc_req *req); +void fun_free_queue(struct fun_queue *funq); + +static inline void fun_set_cq_callback(struct fun_queue *funq, cq_callback_t cb, + void *cb_data) +{ + funq->cq_cb = cb; + funq->cb_data = cb_data; +} + +int fun_create_rq(struct fun_queue *funq); +int fun_create_queue(struct fun_queue *funq); + +void fun_free_irq(struct fun_queue *funq); +int fun_request_irq(struct fun_queue *funq, const char *devname, + irq_handler_t handler, void *data); + +unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max); +unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max); + +#endif /* _FUN_QEUEUE_H */ diff --git a/drivers/net/ethernet/fungible/funeth/Kconfig b/drivers/net/ethernet/fungible/funeth/Kconfig new file mode 100644 index 000000000..c72ad9386 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/Kconfig @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Fungible Ethernet driver configuration +# + +config FUN_ETH + tristate "Fungible Ethernet device driver" + depends on PCI && PCI_MSI + depends on TLS && TLS_DEVICE || TLS_DEVICE=n + select NET_DEVLINK + select FUN_CORE + help + This driver supports the Ethernet functionality of Fungible adapters. + It works with both physical and virtual functions. + + To compile this driver as a module, choose M here. The module + will be called funeth. diff --git a/drivers/net/ethernet/fungible/funeth/Makefile b/drivers/net/ethernet/fungible/funeth/Makefile new file mode 100644 index 000000000..646d69595 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +ccflags-y += -I$(srctree)/$(src)/../funcore -I$(srctree)/$(src) + +obj-$(CONFIG_FUN_ETH) += funeth.o + +funeth-y := funeth_main.o funeth_rx.o funeth_tx.o funeth_devlink.o \ + funeth_ethtool.o + +funeth-$(CONFIG_TLS_DEVICE) += funeth_ktls.o diff --git a/drivers/net/ethernet/fungible/funeth/fun_port.h b/drivers/net/ethernet/fungible/funeth/fun_port.h new file mode 100644 index 000000000..0f9da44e3 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/fun_port.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_PORT_H +#define _FUN_PORT_H + +enum port_mac_rx_stats { + PORT_MAC_RX_etherStatsOctets = 0x0, + PORT_MAC_RX_OctetsReceivedOK = 0x1, + PORT_MAC_RX_aAlignmentErrors = 0x2, + PORT_MAC_RX_aPAUSEMACCtrlFramesReceived = 0x3, + PORT_MAC_RX_aFrameTooLongErrors = 0x4, + PORT_MAC_RX_aInRangeLengthErrors = 0x5, + PORT_MAC_RX_aFramesReceivedOK = 0x6, + PORT_MAC_RX_aFrameCheckSequenceErrors = 0x7, + PORT_MAC_RX_VLANReceivedOK = 0x8, + PORT_MAC_RX_ifInErrors = 0x9, + PORT_MAC_RX_ifInUcastPkts = 0xa, + PORT_MAC_RX_ifInMulticastPkts = 0xb, + PORT_MAC_RX_ifInBroadcastPkts = 0xc, + PORT_MAC_RX_etherStatsDropEvents = 0xd, + PORT_MAC_RX_etherStatsPkts = 0xe, + PORT_MAC_RX_etherStatsUndersizePkts = 0xf, + PORT_MAC_RX_etherStatsPkts64Octets = 0x10, + PORT_MAC_RX_etherStatsPkts65to127Octets = 0x11, + PORT_MAC_RX_etherStatsPkts128to255Octets = 0x12, + PORT_MAC_RX_etherStatsPkts256to511Octets = 0x13, + PORT_MAC_RX_etherStatsPkts512to1023Octets = 0x14, + PORT_MAC_RX_etherStatsPkts1024to1518Octets = 0x15, + PORT_MAC_RX_etherStatsPkts1519toMaxOctets = 0x16, + PORT_MAC_RX_etherStatsOversizePkts = 0x17, + PORT_MAC_RX_etherStatsJabbers = 0x18, + PORT_MAC_RX_etherStatsFragments = 0x19, + PORT_MAC_RX_CBFCPAUSEFramesReceived_0 = 0x1a, + PORT_MAC_RX_CBFCPAUSEFramesReceived_1 = 0x1b, + PORT_MAC_RX_CBFCPAUSEFramesReceived_2 = 0x1c, + PORT_MAC_RX_CBFCPAUSEFramesReceived_3 = 0x1d, + PORT_MAC_RX_CBFCPAUSEFramesReceived_4 = 0x1e, + PORT_MAC_RX_CBFCPAUSEFramesReceived_5 = 0x1f, + PORT_MAC_RX_CBFCPAUSEFramesReceived_6 = 0x20, + PORT_MAC_RX_CBFCPAUSEFramesReceived_7 = 0x21, + PORT_MAC_RX_CBFCPAUSEFramesReceived_8 = 0x22, + PORT_MAC_RX_CBFCPAUSEFramesReceived_9 = 0x23, + PORT_MAC_RX_CBFCPAUSEFramesReceived_10 = 0x24, + PORT_MAC_RX_CBFCPAUSEFramesReceived_11 = 0x25, + PORT_MAC_RX_CBFCPAUSEFramesReceived_12 = 0x26, + PORT_MAC_RX_CBFCPAUSEFramesReceived_13 = 0x27, + PORT_MAC_RX_CBFCPAUSEFramesReceived_14 = 0x28, + PORT_MAC_RX_CBFCPAUSEFramesReceived_15 = 0x29, + PORT_MAC_RX_MACControlFramesReceived = 0x2a, + PORT_MAC_RX_STATS_MAX = 0x2b, +}; + +enum port_mac_tx_stats { + PORT_MAC_TX_etherStatsOctets = 0x0, + PORT_MAC_TX_OctetsTransmittedOK = 0x1, + PORT_MAC_TX_aPAUSEMACCtrlFramesTransmitted = 0x2, + PORT_MAC_TX_aFramesTransmittedOK = 0x3, + PORT_MAC_TX_VLANTransmittedOK = 0x4, + PORT_MAC_TX_ifOutErrors = 0x5, + PORT_MAC_TX_ifOutUcastPkts = 0x6, + PORT_MAC_TX_ifOutMulticastPkts = 0x7, + PORT_MAC_TX_ifOutBroadcastPkts = 0x8, + PORT_MAC_TX_etherStatsPkts64Octets = 0x9, + PORT_MAC_TX_etherStatsPkts65to127Octets = 0xa, + PORT_MAC_TX_etherStatsPkts128to255Octets = 0xb, + PORT_MAC_TX_etherStatsPkts256to511Octets = 0xc, + PORT_MAC_TX_etherStatsPkts512to1023Octets = 0xd, + PORT_MAC_TX_etherStatsPkts1024to1518Octets = 0xe, + PORT_MAC_TX_etherStatsPkts1519toMaxOctets = 0xf, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_0 = 0x10, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_1 = 0x11, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_2 = 0x12, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_3 = 0x13, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_4 = 0x14, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_5 = 0x15, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_6 = 0x16, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_7 = 0x17, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_8 = 0x18, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_9 = 0x19, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_10 = 0x1a, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_11 = 0x1b, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_12 = 0x1c, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_13 = 0x1d, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_14 = 0x1e, + PORT_MAC_TX_CBFCPAUSEFramesTransmitted_15 = 0x1f, + PORT_MAC_TX_MACControlFramesTransmitted = 0x20, + PORT_MAC_TX_etherStatsPkts = 0x21, + PORT_MAC_TX_STATS_MAX = 0x22, +}; + +enum port_mac_fec_stats { + PORT_MAC_FEC_Correctable = 0x0, + PORT_MAC_FEC_Uncorrectable = 0x1, + PORT_MAC_FEC_STATS_MAX = 0x2, +}; + +#endif /* _FUN_PORT_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth.h b/drivers/net/ethernet/fungible/funeth/funeth.h new file mode 100644 index 000000000..1250e10d2 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNETH_H +#define _FUNETH_H + +#include <uapi/linux/if_ether.h> +#include <uapi/linux/net_tstamp.h> +#include <linux/mutex.h> +#include <linux/seqlock.h> +#include <linux/xarray.h> +#include <net/devlink.h> +#include "fun_dev.h" + +#define ADMIN_SQE_SIZE SZ_128 +#define ADMIN_CQE_SIZE SZ_64 +#define ADMIN_RSP_MAX_LEN (ADMIN_CQE_SIZE - sizeof(struct fun_cqe_info)) + +#define FUN_MAX_MTU 9024 + +#define SQ_DEPTH 512U +#define CQ_DEPTH 1024U +#define RQ_DEPTH (512U / (PAGE_SIZE / 4096)) + +#define CQ_INTCOAL_USEC 10 +#define CQ_INTCOAL_NPKT 16 +#define SQ_INTCOAL_USEC 10 +#define SQ_INTCOAL_NPKT 16 + +#define INVALID_LPORT 0xffff + +#define FUN_PORT_CAP_PAUSE_MASK (FUN_PORT_CAP_TX_PAUSE | FUN_PORT_CAP_RX_PAUSE) + +struct fun_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + __be16 vlan_proto; + u8 qos; + u8 spoofchk:1; + u8 trusted:1; + unsigned int max_rate; +}; + +/* "subclass" of fun_dev for Ethernet functions */ +struct fun_ethdev { + struct fun_dev fdev; + + /* the function's network ports */ + struct net_device **netdevs; + unsigned int num_ports; + + /* configuration for the function's virtual ports */ + unsigned int num_vports; + struct fun_vport_info *vport_info; + + struct mutex state_mutex; /* nests inside RTNL if both taken */ + + unsigned int nsqs_per_port; +}; + +static inline struct fun_ethdev *to_fun_ethdev(struct fun_dev *p) +{ + return container_of(p, struct fun_ethdev, fdev); +} + +struct fun_qset { + struct funeth_rxq **rxqs; + struct funeth_txq **txqs; + struct funeth_txq **xdpqs; + unsigned int nrxqs; + unsigned int ntxqs; + unsigned int nxdpqs; + unsigned int rxq_start; + unsigned int txq_start; + unsigned int xdpq_start; + unsigned int cq_depth; + unsigned int rq_depth; + unsigned int sq_depth; + int state; +}; + +/* Per netdevice driver state, i.e., netdev_priv. */ +struct funeth_priv { + struct fun_dev *fdev; + struct pci_dev *pdev; + struct net_device *netdev; + + struct funeth_rxq * __rcu *rxqs; + struct funeth_txq **txqs; + struct funeth_txq * __rcu *xdpqs; + + struct xarray irqs; + unsigned int num_tx_irqs; + unsigned int num_rx_irqs; + unsigned int rx_irq_ofst; + + unsigned int lane_attrs; + u16 lport; + + /* link settings */ + u64 port_caps; + u64 advertising; + u64 lp_advertising; + unsigned int link_speed; + u8 xcvr_type; + u8 active_fc; + u8 active_fec; + u8 link_down_reason; + seqcount_t link_seq; + + u32 msg_enable; + + unsigned int num_xdpqs; + + /* ethtool, etc. config parameters */ + unsigned int sq_depth; + unsigned int rq_depth; + unsigned int cq_depth; + unsigned int cq_irq_db; + u8 tx_coal_usec; + u8 tx_coal_count; + u8 rx_coal_usec; + u8 rx_coal_count; + + struct hwtstamp_config hwtstamp_cfg; + + /* cumulative queue stats from earlier queue instances */ + u64 tx_packets; + u64 tx_bytes; + u64 tx_dropped; + u64 rx_packets; + u64 rx_bytes; + u64 rx_dropped; + + /* RSS */ + unsigned int rss_hw_id; + enum fun_eth_hash_alg hash_algo; + u8 rss_key[FUN_ETH_RSS_MAX_KEY_SIZE]; + unsigned int indir_table_nentries; + u32 indir_table[FUN_ETH_RSS_MAX_INDIR_ENT]; + dma_addr_t rss_dma_addr; + void *rss_cfg; + + /* DMA area for port stats */ + dma_addr_t stats_dma_addr; + __be64 *stats; + + struct bpf_prog *xdp_prog; + + struct devlink_port dl_port; + + /* kTLS state */ + unsigned int ktls_id; + atomic64_t tx_tls_add; + atomic64_t tx_tls_del; + atomic64_t tx_tls_resync; +}; + +void fun_set_ethtool_ops(struct net_device *netdev); +int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data); +int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data); +int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid); +int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs, + struct netlink_ext_ack *extack); +int fun_change_num_queues(struct net_device *dev, unsigned int ntx, + unsigned int nrx); +void fun_set_ring_count(struct net_device *netdev, unsigned int ntx, + unsigned int nrx); +int fun_config_rss(struct net_device *dev, int algo, const u8 *key, + const u32 *qtable, u8 op); + +#endif /* _FUNETH_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c new file mode 100644 index 000000000..d50c22294 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include "funeth.h" +#include "funeth_devlink.h" + +static int fun_dl_info_get(struct devlink *dl, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + return devlink_info_driver_name_put(req, KBUILD_MODNAME); +} + +static const struct devlink_ops fun_dl_ops = { + .info_get = fun_dl_info_get, +}; + +struct devlink *fun_devlink_alloc(struct device *dev) +{ + return devlink_alloc(&fun_dl_ops, sizeof(struct fun_ethdev), dev); +} + +void fun_devlink_free(struct devlink *devlink) +{ + devlink_free(devlink); +} + +void fun_devlink_register(struct devlink *devlink) +{ + devlink_register(devlink); +} + +void fun_devlink_unregister(struct devlink *devlink) +{ + devlink_unregister(devlink); +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.h b/drivers/net/ethernet/fungible/funeth/funeth_devlink.h new file mode 100644 index 000000000..e40464d57 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef __FUNETH_DEVLINK_H +#define __FUNETH_DEVLINK_H + +#include <net/devlink.h> + +struct devlink *fun_devlink_alloc(struct device *dev); +void fun_devlink_free(struct devlink *devlink); +void fun_devlink_register(struct devlink *devlink); +void fun_devlink_unregister(struct devlink *devlink); + +#endif /* __FUNETH_DEVLINK_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c new file mode 100644 index 000000000..31aa185f4 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -0,0 +1,1198 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/ethtool.h> +#include <linux/linkmode.h> +#include <linux/netdevice.h> +#include <linux/nvme.h> +#include <linux/io.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/pci.h> +#include <linux/rtnetlink.h> +#include "funeth.h" +#include "fun_port.h" +#include "funeth_txrx.h" + +/* Min queue depth. The smallest power-of-2 supporting jumbo frames with 4K + * pages is 8. Require it for all types of queues though some could work with + * fewer entries. + */ +#define FUNETH_MIN_QDEPTH 8 + +static const char mac_tx_stat_names[][ETH_GSTRING_LEN] = { + "mac_tx_octets_total", + "mac_tx_frames_total", + "mac_tx_vlan_frames_ok", + "mac_tx_unicast_frames", + "mac_tx_multicast_frames", + "mac_tx_broadcast_frames", + "mac_tx_errors", + "mac_tx_CBFCPAUSE0", + "mac_tx_CBFCPAUSE1", + "mac_tx_CBFCPAUSE2", + "mac_tx_CBFCPAUSE3", + "mac_tx_CBFCPAUSE4", + "mac_tx_CBFCPAUSE5", + "mac_tx_CBFCPAUSE6", + "mac_tx_CBFCPAUSE7", + "mac_tx_CBFCPAUSE8", + "mac_tx_CBFCPAUSE9", + "mac_tx_CBFCPAUSE10", + "mac_tx_CBFCPAUSE11", + "mac_tx_CBFCPAUSE12", + "mac_tx_CBFCPAUSE13", + "mac_tx_CBFCPAUSE14", + "mac_tx_CBFCPAUSE15", +}; + +static const char mac_rx_stat_names[][ETH_GSTRING_LEN] = { + "mac_rx_octets_total", + "mac_rx_frames_total", + "mac_rx_VLAN_frames_ok", + "mac_rx_unicast_frames", + "mac_rx_multicast_frames", + "mac_rx_broadcast_frames", + "mac_rx_drop_events", + "mac_rx_errors", + "mac_rx_alignment_errors", + "mac_rx_CBFCPAUSE0", + "mac_rx_CBFCPAUSE1", + "mac_rx_CBFCPAUSE2", + "mac_rx_CBFCPAUSE3", + "mac_rx_CBFCPAUSE4", + "mac_rx_CBFCPAUSE5", + "mac_rx_CBFCPAUSE6", + "mac_rx_CBFCPAUSE7", + "mac_rx_CBFCPAUSE8", + "mac_rx_CBFCPAUSE9", + "mac_rx_CBFCPAUSE10", + "mac_rx_CBFCPAUSE11", + "mac_rx_CBFCPAUSE12", + "mac_rx_CBFCPAUSE13", + "mac_rx_CBFCPAUSE14", + "mac_rx_CBFCPAUSE15", +}; + +static const char * const txq_stat_names[] = { + "tx_pkts", + "tx_bytes", + "tx_cso", + "tx_tso", + "tx_encapsulated_tso", + "tx_uso", + "tx_more", + "tx_queue_stops", + "tx_queue_restarts", + "tx_mapping_errors", + "tx_tls_encrypted_packets", + "tx_tls_encrypted_bytes", + "tx_tls_ooo", + "tx_tls_drop_no_sync_data", +}; + +static const char * const xdpq_stat_names[] = { + "tx_xdp_pkts", + "tx_xdp_bytes", + "tx_xdp_full", + "tx_xdp_mapping_errors", +}; + +static const char * const rxq_stat_names[] = { + "rx_pkts", + "rx_bytes", + "rx_cso", + "gro_pkts", + "gro_merged", + "rx_xdp_tx", + "rx_xdp_redir", + "rx_xdp_drops", + "rx_buffers", + "rx_page_allocs", + "rx_drops", + "rx_budget_exhausted", + "rx_mapping_errors", +}; + +static const char * const tls_stat_names[] = { + "tx_tls_ctx", + "tx_tls_del", + "tx_tls_resync", +}; + +static void fun_link_modes_to_ethtool(u64 modes, + unsigned long *ethtool_modes_map) +{ +#define ADD_LINK_MODE(mode) \ + __set_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, ethtool_modes_map) + + if (modes & FUN_PORT_CAP_AUTONEG) + ADD_LINK_MODE(Autoneg); + if (modes & FUN_PORT_CAP_1000_X) + ADD_LINK_MODE(1000baseX_Full); + if (modes & FUN_PORT_CAP_10G_R) { + ADD_LINK_MODE(10000baseCR_Full); + ADD_LINK_MODE(10000baseSR_Full); + ADD_LINK_MODE(10000baseLR_Full); + ADD_LINK_MODE(10000baseER_Full); + } + if (modes & FUN_PORT_CAP_25G_R) { + ADD_LINK_MODE(25000baseCR_Full); + ADD_LINK_MODE(25000baseSR_Full); + } + if (modes & FUN_PORT_CAP_40G_R4) { + ADD_LINK_MODE(40000baseCR4_Full); + ADD_LINK_MODE(40000baseSR4_Full); + ADD_LINK_MODE(40000baseLR4_Full); + } + if (modes & FUN_PORT_CAP_50G_R2) { + ADD_LINK_MODE(50000baseCR2_Full); + ADD_LINK_MODE(50000baseSR2_Full); + } + if (modes & FUN_PORT_CAP_50G_R) { + ADD_LINK_MODE(50000baseCR_Full); + ADD_LINK_MODE(50000baseSR_Full); + ADD_LINK_MODE(50000baseLR_ER_FR_Full); + } + if (modes & FUN_PORT_CAP_100G_R4) { + ADD_LINK_MODE(100000baseCR4_Full); + ADD_LINK_MODE(100000baseSR4_Full); + ADD_LINK_MODE(100000baseLR4_ER4_Full); + } + if (modes & FUN_PORT_CAP_100G_R2) { + ADD_LINK_MODE(100000baseCR2_Full); + ADD_LINK_MODE(100000baseSR2_Full); + ADD_LINK_MODE(100000baseLR2_ER2_FR2_Full); + } + if (modes & FUN_PORT_CAP_FEC_NONE) + ADD_LINK_MODE(FEC_NONE); + if (modes & FUN_PORT_CAP_FEC_FC) + ADD_LINK_MODE(FEC_BASER); + if (modes & FUN_PORT_CAP_FEC_RS) + ADD_LINK_MODE(FEC_RS); + if (modes & FUN_PORT_CAP_RX_PAUSE) + ADD_LINK_MODE(Pause); + +#undef ADD_LINK_MODE +} + +static void set_asym_pause(u64 advertising, struct ethtool_link_ksettings *ks) +{ + bool rx_pause, tx_pause; + + rx_pause = advertising & FUN_PORT_CAP_RX_PAUSE; + tx_pause = advertising & FUN_PORT_CAP_TX_PAUSE; + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(ks, advertising, + Asym_Pause); +} + +static unsigned int fun_port_type(unsigned int xcvr) +{ + if (!xcvr) + return PORT_NONE; + + switch (xcvr & 7) { + case FUN_XCVR_BASET: + return PORT_TP; + case FUN_XCVR_CU: + return PORT_DA; + default: + return PORT_FIBRE; + } +} + +static int fun_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *ks) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int seq, speed, xcvr; + u64 lp_advertising; + bool link_up; + + ethtool_link_ksettings_zero_link_mode(ks, supported); + ethtool_link_ksettings_zero_link_mode(ks, advertising); + ethtool_link_ksettings_zero_link_mode(ks, lp_advertising); + + /* Link settings change asynchronously, take a consistent snapshot */ + do { + seq = read_seqcount_begin(&fp->link_seq); + link_up = netif_carrier_ok(netdev); + speed = fp->link_speed; + xcvr = fp->xcvr_type; + lp_advertising = fp->lp_advertising; + } while (read_seqcount_retry(&fp->link_seq, seq)); + + if (link_up) { + ks->base.speed = speed; + ks->base.duplex = DUPLEX_FULL; + fun_link_modes_to_ethtool(lp_advertising, + ks->link_modes.lp_advertising); + } else { + ks->base.speed = SPEED_UNKNOWN; + ks->base.duplex = DUPLEX_UNKNOWN; + } + + ks->base.autoneg = (fp->advertising & FUN_PORT_CAP_AUTONEG) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + ks->base.port = fun_port_type(xcvr); + + fun_link_modes_to_ethtool(fp->port_caps, ks->link_modes.supported); + if (fp->port_caps & (FUN_PORT_CAP_RX_PAUSE | FUN_PORT_CAP_TX_PAUSE)) + ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause); + + fun_link_modes_to_ethtool(fp->advertising, ks->link_modes.advertising); + set_asym_pause(fp->advertising, ks); + return 0; +} + +static u64 fun_advert_modes(const struct ethtool_link_ksettings *ks) +{ + u64 modes = 0; + +#define HAS_MODE(mode) \ + ethtool_link_ksettings_test_link_mode(ks, advertising, mode) + + if (HAS_MODE(1000baseX_Full)) + modes |= FUN_PORT_CAP_1000_X; + if (HAS_MODE(10000baseCR_Full) || HAS_MODE(10000baseSR_Full) || + HAS_MODE(10000baseLR_Full) || HAS_MODE(10000baseER_Full)) + modes |= FUN_PORT_CAP_10G_R; + if (HAS_MODE(25000baseCR_Full) || HAS_MODE(25000baseSR_Full)) + modes |= FUN_PORT_CAP_25G_R; + if (HAS_MODE(40000baseCR4_Full) || HAS_MODE(40000baseSR4_Full) || + HAS_MODE(40000baseLR4_Full)) + modes |= FUN_PORT_CAP_40G_R4; + if (HAS_MODE(50000baseCR2_Full) || HAS_MODE(50000baseSR2_Full)) + modes |= FUN_PORT_CAP_50G_R2; + if (HAS_MODE(50000baseCR_Full) || HAS_MODE(50000baseSR_Full) || + HAS_MODE(50000baseLR_ER_FR_Full)) + modes |= FUN_PORT_CAP_50G_R; + if (HAS_MODE(100000baseCR4_Full) || HAS_MODE(100000baseSR4_Full) || + HAS_MODE(100000baseLR4_ER4_Full)) + modes |= FUN_PORT_CAP_100G_R4; + if (HAS_MODE(100000baseCR2_Full) || HAS_MODE(100000baseSR2_Full) || + HAS_MODE(100000baseLR2_ER2_FR2_Full)) + modes |= FUN_PORT_CAP_100G_R2; + + return modes; +#undef HAS_MODE +} + +static u64 fun_speed_to_link_mode(unsigned int speed) +{ + switch (speed) { + case SPEED_100000: + return FUN_PORT_CAP_100G_R4 | FUN_PORT_CAP_100G_R2; + case SPEED_50000: + return FUN_PORT_CAP_50G_R | FUN_PORT_CAP_50G_R2; + case SPEED_40000: + return FUN_PORT_CAP_40G_R4; + case SPEED_25000: + return FUN_PORT_CAP_25G_R; + case SPEED_10000: + return FUN_PORT_CAP_10G_R; + case SPEED_1000: + return FUN_PORT_CAP_1000_X; + default: + return 0; + } +} + +static int fun_change_advert(struct funeth_priv *fp, u64 new_advert) +{ + int err; + + if (new_advert == fp->advertising) + return 0; + + err = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_ADVERT, new_advert); + if (!err) + fp->advertising = new_advert; + return err; +} + +#define FUN_PORT_CAP_FEC_MASK \ + (FUN_PORT_CAP_FEC_NONE | FUN_PORT_CAP_FEC_FC | FUN_PORT_CAP_FEC_RS) + +static int fun_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *ks) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = {}; + struct funeth_priv *fp = netdev_priv(netdev); + u64 new_advert; + + /* eswitch ports don't support mode changes */ + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + + if (ks->base.duplex == DUPLEX_HALF) + return -EINVAL; + if (ks->base.autoneg == AUTONEG_ENABLE && + !(fp->port_caps & FUN_PORT_CAP_AUTONEG)) + return -EINVAL; + + if (ks->base.autoneg == AUTONEG_ENABLE) { + if (linkmode_empty(ks->link_modes.advertising)) + return -EINVAL; + + fun_link_modes_to_ethtool(fp->port_caps, supported); + if (!linkmode_subset(ks->link_modes.advertising, supported)) + return -EINVAL; + + new_advert = fun_advert_modes(ks) | FUN_PORT_CAP_AUTONEG; + } else { + new_advert = fun_speed_to_link_mode(ks->base.speed); + new_advert &= fp->port_caps; + if (!new_advert) + return -EINVAL; + } + new_advert |= fp->advertising & + (FUN_PORT_CAP_PAUSE_MASK | FUN_PORT_CAP_FEC_MASK); + + return fun_change_advert(fp, new_advert); +} + +static void fun_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + u8 active_pause = fp->active_fc; + + pause->rx_pause = !!(active_pause & FUN_PORT_CAP_RX_PAUSE); + pause->tx_pause = !!(active_pause & FUN_PORT_CAP_TX_PAUSE); + pause->autoneg = !!(fp->advertising & FUN_PORT_CAP_AUTONEG); +} + +static int fun_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 new_advert; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + /* Forcing PAUSE settings with AN enabled is unsupported. */ + if (!pause->autoneg && (fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EOPNOTSUPP; + if (pause->autoneg && !(fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EINVAL; + if (pause->tx_pause && !(fp->port_caps & FUN_PORT_CAP_TX_PAUSE)) + return -EINVAL; + if (pause->rx_pause && !(fp->port_caps & FUN_PORT_CAP_RX_PAUSE)) + return -EINVAL; + + new_advert = fp->advertising & ~FUN_PORT_CAP_PAUSE_MASK; + if (pause->tx_pause) + new_advert |= FUN_PORT_CAP_TX_PAUSE; + if (pause->rx_pause) + new_advert |= FUN_PORT_CAP_RX_PAUSE; + + return fun_change_advert(fp, new_advert); +} + +static int fun_restart_an(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->advertising & FUN_PORT_CAP_AUTONEG)) + return -EOPNOTSUPP; + + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_ADVERT, + FUN_PORT_CAP_AUTONEG); +} + +static int fun_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct funeth_priv *fp = netdev_priv(netdev); + unsigned int beacon; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) + return -EOPNOTSUPP; + if (state != ETHTOOL_ID_ACTIVE && state != ETHTOOL_ID_INACTIVE) + return -EOPNOTSUPP; + + beacon = state == ETHTOOL_ID_ACTIVE ? FUN_PORT_LED_BEACON_ON : + FUN_PORT_LED_BEACON_OFF; + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_LED, beacon); +} + +static void fun_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(fp->pdev), sizeof(info->bus_info)); +} + +static u32 fun_get_msglevel(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return fp->msg_enable; +} + +static void fun_set_msglevel(struct net_device *netdev, u32 value) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + fp->msg_enable = value; +} + +static int fun_get_regs_len(struct net_device *dev) +{ + return NVME_REG_ACQ + sizeof(u64); +} + +static void fun_get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *buf) +{ + const struct funeth_priv *fp = netdev_priv(dev); + void __iomem *bar = fp->fdev->bar; + + regs->version = 0; + *(u64 *)(buf + NVME_REG_CAP) = readq(bar + NVME_REG_CAP); + *(u32 *)(buf + NVME_REG_VS) = readl(bar + NVME_REG_VS); + *(u32 *)(buf + NVME_REG_INTMS) = readl(bar + NVME_REG_INTMS); + *(u32 *)(buf + NVME_REG_INTMC) = readl(bar + NVME_REG_INTMC); + *(u32 *)(buf + NVME_REG_CC) = readl(bar + NVME_REG_CC); + *(u32 *)(buf + NVME_REG_CSTS) = readl(bar + NVME_REG_CSTS); + *(u32 *)(buf + NVME_REG_AQA) = readl(bar + NVME_REG_AQA); + *(u64 *)(buf + NVME_REG_ASQ) = readq(bar + NVME_REG_ASQ); + *(u64 *)(buf + NVME_REG_ACQ) = readq(bar + NVME_REG_ACQ); +} + +static int fun_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kcoal, + struct netlink_ext_ack *ext_ack) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + coal->rx_coalesce_usecs = fp->rx_coal_usec; + coal->rx_max_coalesced_frames = fp->rx_coal_count; + coal->use_adaptive_rx_coalesce = !fp->cq_irq_db; + coal->tx_coalesce_usecs = fp->tx_coal_usec; + coal->tx_max_coalesced_frames = fp->tx_coal_count; + return 0; +} + +static int fun_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal, + struct kernel_ethtool_coalesce *kcoal, + struct netlink_ext_ack *ext_ack) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_rxq **rxqs; + unsigned int i, db_val; + + if (coal->rx_coalesce_usecs > FUN_DB_INTCOAL_USEC_M || + coal->rx_max_coalesced_frames > FUN_DB_INTCOAL_ENTRIES_M || + (coal->rx_coalesce_usecs | coal->rx_max_coalesced_frames) == 0 || + coal->tx_coalesce_usecs > FUN_DB_INTCOAL_USEC_M || + coal->tx_max_coalesced_frames > FUN_DB_INTCOAL_ENTRIES_M || + (coal->tx_coalesce_usecs | coal->tx_max_coalesced_frames) == 0) + return -EINVAL; + + /* a timer is required if there's any coalescing */ + if ((coal->rx_max_coalesced_frames > 1 && !coal->rx_coalesce_usecs) || + (coal->tx_max_coalesced_frames > 1 && !coal->tx_coalesce_usecs)) + return -EINVAL; + + fp->rx_coal_usec = coal->rx_coalesce_usecs; + fp->rx_coal_count = coal->rx_max_coalesced_frames; + fp->tx_coal_usec = coal->tx_coalesce_usecs; + fp->tx_coal_count = coal->tx_max_coalesced_frames; + + db_val = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count); + WRITE_ONCE(fp->cq_irq_db, db_val); + + rxqs = rtnl_dereference(fp->rxqs); + if (!rxqs) + return 0; + + for (i = 0; i < netdev->real_num_rx_queues; i++) + WRITE_ONCE(rxqs[i]->irq_db_val, db_val); + + db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec, fp->tx_coal_count); + for (i = 0; i < netdev->real_num_tx_queues; i++) + WRITE_ONCE(fp->txqs[i]->irq_db_val, db_val); + + return 0; +} + +static void fun_get_channels(struct net_device *netdev, + struct ethtool_channels *chan) +{ + chan->max_rx = netdev->num_rx_queues; + chan->rx_count = netdev->real_num_rx_queues; + + chan->max_tx = netdev->num_tx_queues; + chan->tx_count = netdev->real_num_tx_queues; +} + +static int fun_set_channels(struct net_device *netdev, + struct ethtool_channels *chan) +{ + if (!chan->tx_count || !chan->rx_count) + return -EINVAL; + + if (chan->tx_count == netdev->real_num_tx_queues && + chan->rx_count == netdev->real_num_rx_queues) + return 0; + + if (netif_running(netdev)) + return fun_change_num_queues(netdev, chan->tx_count, + chan->rx_count); + + fun_set_ring_count(netdev, chan->tx_count, chan->rx_count); + return 0; +} + +static void fun_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *extack) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int max_depth = fp->fdev->q_depth; + + /* We size CQs to be twice the RQ depth so max RQ depth is half the + * max queue depth. + */ + ring->rx_max_pending = max_depth / 2; + ring->tx_max_pending = max_depth; + + ring->rx_pending = fp->rq_depth; + ring->tx_pending = fp->sq_depth; + + kring->rx_buf_len = PAGE_SIZE; + kring->cqe_size = FUNETH_CQE_SIZE; +} + +static int fun_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kring, + struct netlink_ext_ack *extack) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + if (ring->rx_mini_pending || ring->rx_jumbo_pending) + return -EINVAL; + + /* queue depths must be powers-of-2 */ + if (!is_power_of_2(ring->rx_pending) || + !is_power_of_2(ring->tx_pending)) + return -EINVAL; + + if (ring->rx_pending < FUNETH_MIN_QDEPTH || + ring->tx_pending < FUNETH_MIN_QDEPTH) + return -EINVAL; + + if (fp->sq_depth == ring->tx_pending && + fp->rq_depth == ring->rx_pending) + return 0; + + if (netif_running(netdev)) { + struct fun_qset req = { + .cq_depth = 2 * ring->rx_pending, + .rq_depth = ring->rx_pending, + .sq_depth = ring->tx_pending + }; + + rc = fun_replace_queues(netdev, &req, extack); + if (rc) + return rc; + } + + fp->sq_depth = ring->tx_pending; + fp->rq_depth = ring->rx_pending; + fp->cq_depth = 2 * fp->rq_depth; + return 0; +} + +static int fun_get_sset_count(struct net_device *dev, int sset) +{ + const struct funeth_priv *fp = netdev_priv(dev); + int n; + + switch (sset) { + case ETH_SS_STATS: + n = (dev->real_num_tx_queues + 1) * ARRAY_SIZE(txq_stat_names) + + (dev->real_num_rx_queues + 1) * ARRAY_SIZE(rxq_stat_names) + + (fp->num_xdpqs + 1) * ARRAY_SIZE(xdpq_stat_names) + + ARRAY_SIZE(tls_stat_names); + if (fp->port_caps & FUN_PORT_CAP_STATS) { + n += ARRAY_SIZE(mac_tx_stat_names) + + ARRAY_SIZE(mac_rx_stat_names); + } + return n; + default: + break; + } + return 0; +} + +static void fun_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + unsigned int i, j; + u8 *p = data; + + switch (sset) { + case ETH_SS_STATS: + if (fp->port_caps & FUN_PORT_CAP_STATS) { + memcpy(p, mac_tx_stat_names, sizeof(mac_tx_stat_names)); + p += sizeof(mac_tx_stat_names); + memcpy(p, mac_rx_stat_names, sizeof(mac_rx_stat_names)); + p += sizeof(mac_rx_stat_names); + } + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + for (j = 0; j < ARRAY_SIZE(txq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", txq_stat_names[j], + i); + } + for (j = 0; j < ARRAY_SIZE(txq_stat_names); j++) + ethtool_sprintf(&p, txq_stat_names[j]); + + for (i = 0; i < fp->num_xdpqs; i++) { + for (j = 0; j < ARRAY_SIZE(xdpq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", + xdpq_stat_names[j], i); + } + for (j = 0; j < ARRAY_SIZE(xdpq_stat_names); j++) + ethtool_sprintf(&p, xdpq_stat_names[j]); + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + for (j = 0; j < ARRAY_SIZE(rxq_stat_names); j++) + ethtool_sprintf(&p, "%s[%u]", rxq_stat_names[j], + i); + } + for (j = 0; j < ARRAY_SIZE(rxq_stat_names); j++) + ethtool_sprintf(&p, rxq_stat_names[j]); + + for (j = 0; j < ARRAY_SIZE(tls_stat_names); j++) + ethtool_sprintf(&p, tls_stat_names[j]); + break; + default: + break; + } +} + +static u64 *get_mac_stats(const struct funeth_priv *fp, u64 *data) +{ +#define TX_STAT(s) \ + *data++ = be64_to_cpu(fp->stats[PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_##s]) + + TX_STAT(etherStatsOctets); + TX_STAT(etherStatsPkts); + TX_STAT(VLANTransmittedOK); + TX_STAT(ifOutUcastPkts); + TX_STAT(ifOutMulticastPkts); + TX_STAT(ifOutBroadcastPkts); + TX_STAT(ifOutErrors); + TX_STAT(CBFCPAUSEFramesTransmitted_0); + TX_STAT(CBFCPAUSEFramesTransmitted_1); + TX_STAT(CBFCPAUSEFramesTransmitted_2); + TX_STAT(CBFCPAUSEFramesTransmitted_3); + TX_STAT(CBFCPAUSEFramesTransmitted_4); + TX_STAT(CBFCPAUSEFramesTransmitted_5); + TX_STAT(CBFCPAUSEFramesTransmitted_6); + TX_STAT(CBFCPAUSEFramesTransmitted_7); + TX_STAT(CBFCPAUSEFramesTransmitted_8); + TX_STAT(CBFCPAUSEFramesTransmitted_9); + TX_STAT(CBFCPAUSEFramesTransmitted_10); + TX_STAT(CBFCPAUSEFramesTransmitted_11); + TX_STAT(CBFCPAUSEFramesTransmitted_12); + TX_STAT(CBFCPAUSEFramesTransmitted_13); + TX_STAT(CBFCPAUSEFramesTransmitted_14); + TX_STAT(CBFCPAUSEFramesTransmitted_15); + +#define RX_STAT(s) *data++ = be64_to_cpu(fp->stats[PORT_MAC_RX_##s]) + + RX_STAT(etherStatsOctets); + RX_STAT(etherStatsPkts); + RX_STAT(VLANReceivedOK); + RX_STAT(ifInUcastPkts); + RX_STAT(ifInMulticastPkts); + RX_STAT(ifInBroadcastPkts); + RX_STAT(etherStatsDropEvents); + RX_STAT(ifInErrors); + RX_STAT(aAlignmentErrors); + RX_STAT(CBFCPAUSEFramesReceived_0); + RX_STAT(CBFCPAUSEFramesReceived_1); + RX_STAT(CBFCPAUSEFramesReceived_2); + RX_STAT(CBFCPAUSEFramesReceived_3); + RX_STAT(CBFCPAUSEFramesReceived_4); + RX_STAT(CBFCPAUSEFramesReceived_5); + RX_STAT(CBFCPAUSEFramesReceived_6); + RX_STAT(CBFCPAUSEFramesReceived_7); + RX_STAT(CBFCPAUSEFramesReceived_8); + RX_STAT(CBFCPAUSEFramesReceived_9); + RX_STAT(CBFCPAUSEFramesReceived_10); + RX_STAT(CBFCPAUSEFramesReceived_11); + RX_STAT(CBFCPAUSEFramesReceived_12); + RX_STAT(CBFCPAUSEFramesReceived_13); + RX_STAT(CBFCPAUSEFramesReceived_14); + RX_STAT(CBFCPAUSEFramesReceived_15); + + return data; + +#undef TX_STAT +#undef RX_STAT +} + +static void fun_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq_stats txs; + struct funeth_rxq_stats rxs; + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i, start; + u64 *totals, *tot; + + if (fp->port_caps & FUN_PORT_CAP_STATS) + data = get_mac_stats(fp, data); + + rxqs = rtnl_dereference(fp->rxqs); + if (!rxqs) + return; + +#define ADD_STAT(cnt) do { \ + *data = (cnt); *tot++ += *data++; \ +} while (0) + + /* Tx queues */ + totals = data + netdev->real_num_tx_queues * ARRAY_SIZE(txq_stat_names); + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + tot = totals; + + FUN_QSTAT_READ(fp->txqs[i], start, txs); + + ADD_STAT(txs.tx_pkts); + ADD_STAT(txs.tx_bytes); + ADD_STAT(txs.tx_cso); + ADD_STAT(txs.tx_tso); + ADD_STAT(txs.tx_encap_tso); + ADD_STAT(txs.tx_uso); + ADD_STAT(txs.tx_more); + ADD_STAT(txs.tx_nstops); + ADD_STAT(txs.tx_nrestarts); + ADD_STAT(txs.tx_map_err); + ADD_STAT(txs.tx_tls_pkts); + ADD_STAT(txs.tx_tls_bytes); + ADD_STAT(txs.tx_tls_fallback); + ADD_STAT(txs.tx_tls_drops); + } + data += ARRAY_SIZE(txq_stat_names); + + /* XDP Tx queues */ + xdpqs = rtnl_dereference(fp->xdpqs); + totals = data + fp->num_xdpqs * ARRAY_SIZE(xdpq_stat_names); + + for (i = 0; i < fp->num_xdpqs; i++) { + tot = totals; + + FUN_QSTAT_READ(xdpqs[i], start, txs); + + ADD_STAT(txs.tx_pkts); + ADD_STAT(txs.tx_bytes); + ADD_STAT(txs.tx_xdp_full); + ADD_STAT(txs.tx_map_err); + } + data += ARRAY_SIZE(xdpq_stat_names); + + /* Rx queues */ + totals = data + netdev->real_num_rx_queues * ARRAY_SIZE(rxq_stat_names); + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + tot = totals; + + FUN_QSTAT_READ(rxqs[i], start, rxs); + + ADD_STAT(rxs.rx_pkts); + ADD_STAT(rxs.rx_bytes); + ADD_STAT(rxs.rx_cso); + ADD_STAT(rxs.gro_pkts); + ADD_STAT(rxs.gro_merged); + ADD_STAT(rxs.xdp_tx); + ADD_STAT(rxs.xdp_redir); + ADD_STAT(rxs.xdp_drops); + ADD_STAT(rxs.rx_bufs); + ADD_STAT(rxs.rx_page_alloc); + ADD_STAT(rxs.rx_mem_drops + rxs.xdp_err); + ADD_STAT(rxs.rx_budget); + ADD_STAT(rxs.rx_map_err); + } + data += ARRAY_SIZE(rxq_stat_names); +#undef ADD_STAT + + *data++ = atomic64_read(&fp->tx_tls_add); + *data++ = atomic64_read(&fp->tx_tls_del); + *data++ = atomic64_read(&fp->tx_tls_resync); +} + +#define RX_STAT(fp, s) be64_to_cpu((fp)->stats[PORT_MAC_RX_##s]) +#define TX_STAT(fp, s) \ + be64_to_cpu((fp)->stats[PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_##s]) +#define FEC_STAT(fp, s) \ + be64_to_cpu((fp)->stats[PORT_MAC_RX_STATS_MAX + \ + PORT_MAC_TX_STATS_MAX + PORT_MAC_FEC_##s]) + +static void fun_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->tx_pause_frames = TX_STAT(fp, aPAUSEMACCtrlFramesTransmitted); + stats->rx_pause_frames = RX_STAT(fp, aPAUSEMACCtrlFramesReceived); +} + +static void fun_get_802_3_stats(struct net_device *netdev, + struct ethtool_eth_mac_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->FramesTransmittedOK = TX_STAT(fp, aFramesTransmittedOK); + stats->FramesReceivedOK = RX_STAT(fp, aFramesReceivedOK); + stats->FrameCheckSequenceErrors = RX_STAT(fp, aFrameCheckSequenceErrors); + stats->OctetsTransmittedOK = TX_STAT(fp, OctetsTransmittedOK); + stats->OctetsReceivedOK = RX_STAT(fp, OctetsReceivedOK); + stats->InRangeLengthErrors = RX_STAT(fp, aInRangeLengthErrors); + stats->FrameTooLongErrors = RX_STAT(fp, aFrameTooLongErrors); +} + +static void fun_get_802_3_ctrl_stats(struct net_device *netdev, + struct ethtool_eth_ctrl_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->MACControlFramesTransmitted = TX_STAT(fp, MACControlFramesTransmitted); + stats->MACControlFramesReceived = RX_STAT(fp, MACControlFramesReceived); +} + +static void fun_get_rmon_stats(struct net_device *netdev, + struct ethtool_rmon_stats *stats, + const struct ethtool_rmon_hist_range **ranges) +{ + static const struct ethtool_rmon_hist_range rmon_ranges[] = { + { 64, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 32767 }, + {} + }; + + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->undersize_pkts = RX_STAT(fp, etherStatsUndersizePkts); + stats->oversize_pkts = RX_STAT(fp, etherStatsOversizePkts); + stats->fragments = RX_STAT(fp, etherStatsFragments); + stats->jabbers = RX_STAT(fp, etherStatsJabbers); + + stats->hist[0] = RX_STAT(fp, etherStatsPkts64Octets); + stats->hist[1] = RX_STAT(fp, etherStatsPkts65to127Octets); + stats->hist[2] = RX_STAT(fp, etherStatsPkts128to255Octets); + stats->hist[3] = RX_STAT(fp, etherStatsPkts256to511Octets); + stats->hist[4] = RX_STAT(fp, etherStatsPkts512to1023Octets); + stats->hist[5] = RX_STAT(fp, etherStatsPkts1024to1518Octets); + stats->hist[6] = RX_STAT(fp, etherStatsPkts1519toMaxOctets); + + stats->hist_tx[0] = TX_STAT(fp, etherStatsPkts64Octets); + stats->hist_tx[1] = TX_STAT(fp, etherStatsPkts65to127Octets); + stats->hist_tx[2] = TX_STAT(fp, etherStatsPkts128to255Octets); + stats->hist_tx[3] = TX_STAT(fp, etherStatsPkts256to511Octets); + stats->hist_tx[4] = TX_STAT(fp, etherStatsPkts512to1023Octets); + stats->hist_tx[5] = TX_STAT(fp, etherStatsPkts1024to1518Octets); + stats->hist_tx[6] = TX_STAT(fp, etherStatsPkts1519toMaxOctets); + + *ranges = rmon_ranges; +} + +static void fun_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *stats) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return; + + stats->corrected_blocks.total = FEC_STAT(fp, Correctable); + stats->uncorrectable_blocks.total = FEC_STAT(fp, Uncorrectable); +} + +#undef RX_STAT +#undef TX_STAT +#undef FEC_STAT + +static int fun_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = netdev->real_num_rx_queues; + return 0; + default: + break; + } + return -EOPNOTSUPP; +} + +static int fun_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info) +{ + return 0; +} + +static u32 fun_get_rxfh_indir_size(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return fp->indir_table_nentries; +} + +static u32 fun_get_rxfh_key_size(struct net_device *netdev) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + return sizeof(fp->rss_key); +} + +static int fun_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + const struct funeth_priv *fp = netdev_priv(netdev); + + if (!fp->rss_cfg) + return -EOPNOTSUPP; + + if (indir) + memcpy(indir, fp->indir_table, + sizeof(u32) * fp->indir_table_nentries); + + if (key) + memcpy(key, fp->rss_key, sizeof(fp->rss_key)); + + if (hfunc) + *hfunc = fp->hash_algo == FUN_ETH_RSS_ALG_TOEPLITZ ? + ETH_RSS_HASH_TOP : ETH_RSS_HASH_CRC32; + + return 0; +} + +static int fun_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct funeth_priv *fp = netdev_priv(netdev); + const u32 *rss_indir = indir ? indir : fp->indir_table; + const u8 *rss_key = key ? key : fp->rss_key; + enum fun_eth_hash_alg algo; + + if (!fp->rss_cfg) + return -EOPNOTSUPP; + + if (hfunc == ETH_RSS_HASH_NO_CHANGE) + algo = fp->hash_algo; + else if (hfunc == ETH_RSS_HASH_CRC32) + algo = FUN_ETH_RSS_ALG_CRC32; + else if (hfunc == ETH_RSS_HASH_TOP) + algo = FUN_ETH_RSS_ALG_TOEPLITZ; + else + return -EINVAL; + + /* If the port is enabled try to reconfigure RSS and keep the new + * settings if successful. If it is down we update the RSS settings + * and apply them at the next UP time. + */ + if (netif_running(netdev)) { + int rc = fun_config_rss(netdev, algo, rss_key, rss_indir, + FUN_ADMIN_SUBOP_MODIFY); + if (rc) + return rc; + } + + fp->hash_algo = algo; + if (key) + memcpy(fp->rss_key, key, sizeof(fp->rss_key)); + if (indir) + memcpy(fp->indir_table, indir, + sizeof(u32) * fp->indir_table_nentries); + return 0; +} + +static int fun_get_ts_info(struct net_device *netdev, + struct ethtool_ts_info *info) +{ + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->phc_index = -1; + info->tx_types = BIT(HWTSTAMP_TX_OFF); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + return 0; +} + +static unsigned int to_ethtool_fec(unsigned int fun_fec) +{ + unsigned int fec = 0; + + if (fun_fec == FUN_PORT_FEC_NA) + fec |= ETHTOOL_FEC_NONE; + if (fun_fec & FUN_PORT_FEC_OFF) + fec |= ETHTOOL_FEC_OFF; + if (fun_fec & FUN_PORT_FEC_RS) + fec |= ETHTOOL_FEC_RS; + if (fun_fec & FUN_PORT_FEC_FC) + fec |= ETHTOOL_FEC_BASER; + if (fun_fec & FUN_PORT_FEC_AUTO) + fec |= ETHTOOL_FEC_AUTO; + return fec; +} + +static int fun_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 fec_data; + int rc; + + rc = fun_port_read_cmd(fp, FUN_ADMIN_PORT_KEY_FEC, &fec_data); + if (rc) + return rc; + + fec->active_fec = to_ethtool_fec(fec_data & 0xff); + fec->fec = to_ethtool_fec(fec_data >> 8); + return 0; +} + +static int fun_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fec) +{ + struct funeth_priv *fp = netdev_priv(netdev); + u64 fec_mode; + + switch (fec->fec) { + case ETHTOOL_FEC_AUTO: + fec_mode = FUN_PORT_FEC_AUTO; + break; + case ETHTOOL_FEC_OFF: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_NONE)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_OFF; + break; + case ETHTOOL_FEC_BASER: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_FC)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_FC; + break; + case ETHTOOL_FEC_RS: + if (!(fp->port_caps & FUN_PORT_CAP_FEC_RS)) + return -EINVAL; + fec_mode = FUN_PORT_FEC_RS; + break; + default: + return -EINVAL; + } + + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_FEC, fec_mode); +} + +static int fun_get_port_module_page(struct net_device *netdev, + const struct ethtool_module_eeprom *req, + struct netlink_ext_ack *extack) +{ + union { + struct fun_admin_port_req req; + struct fun_admin_port_xcvr_read_rsp rsp; + } cmd; + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) { + NL_SET_ERR_MSG_MOD(extack, + "Specified port is virtual, only physical ports have modules"); + return -EOPNOTSUPP; + } + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + sizeof(cmd.req)); + cmd.req.u.xcvr_read = + FUN_ADMIN_PORT_XCVR_READ_REQ_INIT(0, netdev->dev_port, + req->bank, req->page, + req->offset, req->length, + req->i2c_address); + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd.rsp), 0); + if (rc) + return rc; + + memcpy(req->data, cmd.rsp.data, req->length); + return req->length; +} + +static const struct ethtool_ops fun_ethtool_ops = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_MAX_FRAMES, + .get_link_ksettings = fun_get_link_ksettings, + .set_link_ksettings = fun_set_link_ksettings, + .set_phys_id = fun_set_phys_id, + .get_drvinfo = fun_get_drvinfo, + .get_msglevel = fun_get_msglevel, + .set_msglevel = fun_set_msglevel, + .get_regs_len = fun_get_regs_len, + .get_regs = fun_get_regs, + .get_link = ethtool_op_get_link, + .get_coalesce = fun_get_coalesce, + .set_coalesce = fun_set_coalesce, + .get_ts_info = fun_get_ts_info, + .get_ringparam = fun_get_ringparam, + .set_ringparam = fun_set_ringparam, + .get_sset_count = fun_get_sset_count, + .get_strings = fun_get_strings, + .get_ethtool_stats = fun_get_ethtool_stats, + .get_rxnfc = fun_get_rxnfc, + .set_rxnfc = fun_set_rxnfc, + .get_rxfh_indir_size = fun_get_rxfh_indir_size, + .get_rxfh_key_size = fun_get_rxfh_key_size, + .get_rxfh = fun_get_rxfh, + .set_rxfh = fun_set_rxfh, + .get_channels = fun_get_channels, + .set_channels = fun_set_channels, + .get_fecparam = fun_get_fecparam, + .set_fecparam = fun_set_fecparam, + .get_pauseparam = fun_get_pauseparam, + .set_pauseparam = fun_set_pauseparam, + .nway_reset = fun_restart_an, + .get_pause_stats = fun_get_pause_stats, + .get_fec_stats = fun_get_fec_stats, + .get_eth_mac_stats = fun_get_802_3_stats, + .get_eth_ctrl_stats = fun_get_802_3_ctrl_stats, + .get_rmon_stats = fun_get_rmon_stats, + .get_module_eeprom_by_page = fun_get_port_module_page, +}; + +void fun_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &fun_ethtool_ops; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ktls.c b/drivers/net/ethernet/fungible/funeth/funeth_ktls.c new file mode 100644 index 000000000..f871def70 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ktls.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include "funeth.h" +#include "funeth_ktls.h" + +static int fun_admin_ktls_create(struct funeth_priv *fp, unsigned int id) +{ + struct fun_admin_ktls_create_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + sizeof(req)), + .subop = FUN_ADMIN_SUBOP_CREATE, + .id = cpu_to_be32(id), + }; + + return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); +} + +static int fun_ktls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + sizeof(req)), + .subop = FUN_ADMIN_SUBOP_MODIFY, + .id = cpu_to_be32(fp->ktls_id), + .tcp_seq = cpu_to_be32(start_offload_tcp_sn), + }; + struct fun_admin_ktls_modify_rsp rsp; + struct fun_ktls_tx_ctx *tx_ctx; + int rc; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return -EOPNOTSUPP; + + if (crypto_info->version == TLS_1_2_VERSION) + req.version = FUN_KTLS_TLSV2; + else + return -EOPNOTSUPP; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *c = (void *)crypto_info; + + req.cipher = FUN_KTLS_CIPHER_AES_GCM_128; + memcpy(req.key, c->key, sizeof(c->key)); + memcpy(req.iv, c->iv, sizeof(c->iv)); + memcpy(req.salt, c->salt, sizeof(c->salt)); + memcpy(req.record_seq, c->rec_seq, sizeof(c->rec_seq)); + break; + } + default: + return -EOPNOTSUPP; + } + + rc = fun_submit_admin_sync_cmd(fp->fdev, &req.common, &rsp, + sizeof(rsp), 0); + memzero_explicit(&req, sizeof(req)); + if (rc) + return rc; + + tx_ctx = tls_driver_ctx(sk, direction); + tx_ctx->tlsid = rsp.tlsid; + tx_ctx->next_seq = start_offload_tcp_sn; + atomic64_inc(&fp->tx_tls_add); + return 0; +} + +static void fun_ktls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req; + struct fun_ktls_tx_ctx *tx_ctx; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return; + + tx_ctx = __tls_driver_ctx(tls_ctx, direction); + + req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + offsetof(struct fun_admin_ktls_modify_req, tcp_seq)); + req.subop = FUN_ADMIN_SUBOP_MODIFY; + req.flags = cpu_to_be16(FUN_KTLS_MODIFY_REMOVE); + req.id = cpu_to_be32(fp->ktls_id); + req.tlsid = tx_ctx->tlsid; + + fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); + atomic64_inc(&fp->tx_tls_del); +} + +static int fun_ktls_resync(struct net_device *netdev, struct sock *sk, u32 seq, + u8 *rcd_sn, enum tls_offload_ctx_dir direction) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_admin_ktls_modify_req req; + struct fun_ktls_tx_ctx *tx_ctx; + int rc; + + if (direction != TLS_OFFLOAD_CTX_DIR_TX) + return -EOPNOTSUPP; + + tx_ctx = tls_driver_ctx(sk, direction); + + req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS, + offsetof(struct fun_admin_ktls_modify_req, key)); + req.subop = FUN_ADMIN_SUBOP_MODIFY; + req.flags = 0; + req.id = cpu_to_be32(fp->ktls_id); + req.tlsid = tx_ctx->tlsid; + req.tcp_seq = cpu_to_be32(seq); + req.version = 0; + req.cipher = 0; + memcpy(req.record_seq, rcd_sn, sizeof(req.record_seq)); + + atomic64_inc(&fp->tx_tls_resync); + rc = fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); + if (!rc) + tx_ctx->next_seq = seq; + return rc; +} + +static const struct tlsdev_ops fun_ktls_ops = { + .tls_dev_add = fun_ktls_add, + .tls_dev_del = fun_ktls_del, + .tls_dev_resync = fun_ktls_resync, +}; + +int fun_ktls_init(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + rc = fun_admin_ktls_create(fp, netdev->dev_port); + if (rc) + return rc; + + fp->ktls_id = netdev->dev_port; + netdev->tlsdev_ops = &fun_ktls_ops; + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + return 0; +} + +void fun_ktls_cleanup(struct funeth_priv *fp) +{ + if (fp->ktls_id == FUN_HCI_ID_INVALID) + return; + + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_KTLS, 0, fp->ktls_id); + fp->ktls_id = FUN_HCI_ID_INVALID; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ktls.h b/drivers/net/ethernet/fungible/funeth/funeth_ktls.h new file mode 100644 index 000000000..9d6f2141a --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_ktls.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUN_KTLS_H +#define _FUN_KTLS_H + +#include <net/tls.h> + +struct funeth_priv; + +struct fun_ktls_tx_ctx { + __be64 tlsid; + u32 next_seq; +}; + +#if IS_ENABLED(CONFIG_TLS_DEVICE) +int fun_ktls_init(struct net_device *netdev); +void fun_ktls_cleanup(struct funeth_priv *fp); + +#else + +static inline void fun_ktls_init(struct net_device *netdev) +{ +} + +static inline void fun_ktls_cleanup(struct funeth_priv *fp) +{ +} +#endif + +#endif /* _FUN_KTLS_H */ diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c new file mode 100644 index 000000000..095f51c4d --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c @@ -0,0 +1,2086 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/bpf.h> +#include <linux/crash_dump.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/filter.h> +#include <linux/idr.h> +#include <linux/if_vlan.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/rtnetlink.h> +#include <linux/inetdevice.h> + +#include "funeth.h" +#include "funeth_devlink.h" +#include "funeth_ktls.h" +#include "fun_port.h" +#include "fun_queue.h" +#include "funeth_txrx.h" + +#define ADMIN_SQ_DEPTH 32 +#define ADMIN_CQ_DEPTH 64 +#define ADMIN_RQ_DEPTH 16 + +/* Default number of Tx/Rx queues. */ +#define FUN_DFLT_QUEUES 16U + +enum { + FUN_SERV_RES_CHANGE = FUN_SERV_FIRST_AVAIL, + FUN_SERV_DEL_PORTS, +}; + +static const struct pci_device_id funeth_id_table[] = { + { PCI_VDEVICE(FUNGIBLE, 0x0101) }, + { PCI_VDEVICE(FUNGIBLE, 0x0181) }, + { 0, } +}; + +/* Issue a port write admin command with @n key/value pairs. */ +static int fun_port_write_cmds(struct funeth_priv *fp, unsigned int n, + const int *keys, const u64 *data) +{ + unsigned int cmd_size, i; + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + u8 v[ADMIN_SQE_SIZE]; + } cmd; + + cmd_size = offsetof(struct fun_admin_port_req, u.write.write48) + + n * sizeof(struct fun_admin_write48_req); + if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + cmd_size); + cmd.req.u.write = + FUN_ADMIN_PORT_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE, 0, + fp->netdev->dev_port); + for (i = 0; i < n; i++) + cmd.req.u.write.write48[i] = + FUN_ADMIN_WRITE48_REQ_INIT(keys[i], data[i]); + + return fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, cmd_size, 0); +} + +int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data) +{ + return fun_port_write_cmds(fp, 1, &key, &data); +} + +/* Issue a port read admin command with @n key/value pairs. */ +static int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n, + const int *keys, u64 *data) +{ + const struct fun_admin_read48_rsp *r48rsp; + unsigned int cmd_size, i; + int rc; + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + u8 v[ADMIN_SQE_SIZE]; + } cmd; + + cmd_size = offsetof(struct fun_admin_port_req, u.read.read48) + + n * sizeof(struct fun_admin_read48_req); + if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN) + return -EINVAL; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + cmd_size); + cmd.req.u.read = + FUN_ADMIN_PORT_READ_REQ_INIT(FUN_ADMIN_SUBOP_READ, 0, + fp->netdev->dev_port); + for (i = 0; i < n; i++) + cmd.req.u.read.read48[i] = FUN_ADMIN_READ48_REQ_INIT(keys[i]); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, cmd_size, 0); + if (rc) + return rc; + + for (r48rsp = cmd.rsp.u.read.read48, i = 0; i < n; i++, r48rsp++) { + data[i] = FUN_ADMIN_READ48_RSP_DATA_G(r48rsp->key_to_data); + dev_dbg(fp->fdev->dev, + "port_read_rsp lport=%u (key_to_data=0x%llx) key=%d data:%lld retval:%lld", + fp->lport, r48rsp->key_to_data, keys[i], data[i], + FUN_ADMIN_READ48_RSP_RET_G(r48rsp->key_to_data)); + } + return 0; +} + +int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data) +{ + return fun_port_read_cmds(fp, 1, &key, data); +} + +static void fun_report_link(struct net_device *netdev) +{ + if (netif_carrier_ok(netdev)) { + const struct funeth_priv *fp = netdev_priv(netdev); + const char *fec = "", *pause = ""; + int speed = fp->link_speed; + char unit = 'M'; + + if (fp->link_speed >= SPEED_1000) { + speed /= 1000; + unit = 'G'; + } + + if (fp->active_fec & FUN_PORT_FEC_RS) + fec = ", RS-FEC"; + else if (fp->active_fec & FUN_PORT_FEC_FC) + fec = ", BASER-FEC"; + + if ((fp->active_fc & FUN_PORT_CAP_PAUSE_MASK) == FUN_PORT_CAP_PAUSE_MASK) + pause = ", Tx/Rx PAUSE"; + else if (fp->active_fc & FUN_PORT_CAP_RX_PAUSE) + pause = ", Rx PAUSE"; + else if (fp->active_fc & FUN_PORT_CAP_TX_PAUSE) + pause = ", Tx PAUSE"; + + netdev_info(netdev, "Link up at %d %cb/s full-duplex%s%s\n", + speed, unit, pause, fec); + } else { + netdev_info(netdev, "Link down\n"); + } +} + +static int fun_adi_write(struct fun_dev *fdev, enum fun_admin_adi_attr attr, + unsigned int adi_id, const struct fun_adi_param *param) +{ + struct fun_admin_adi_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ADI, + sizeof(req)), + .u.write.subop = FUN_ADMIN_SUBOP_WRITE, + .u.write.attribute = attr, + .u.write.id = cpu_to_be32(adi_id), + .u.write.param = *param + }; + + return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0); +} + +/* Configure RSS for the given port. @op determines whether a new RSS context + * is to be created or whether an existing one should be reconfigured. The + * remaining parameters specify the hashing algorithm, key, and indirection + * table. + * + * This initiates packet delivery to the Rx queues set in the indirection + * table. + */ +int fun_config_rss(struct net_device *dev, int algo, const u8 *key, + const u32 *qtable, u8 op) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int table_len = fp->indir_table_nentries; + unsigned int len = FUN_ETH_RSS_MAX_KEY_SIZE + sizeof(u32) * table_len; + struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs); + union { + struct { + struct fun_admin_rss_req req; + struct fun_dataop_gl gl; + }; + struct fun_admin_generic_create_rsp rsp; + } cmd; + __be32 *indir_tab; + u16 flags; + int rc; + + if (op != FUN_ADMIN_SUBOP_CREATE && fp->rss_hw_id == FUN_HCI_ID_INVALID) + return -EINVAL; + + flags = op == FUN_ADMIN_SUBOP_CREATE ? + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR : 0; + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_RSS, + sizeof(cmd)); + cmd.req.u.create = + FUN_ADMIN_RSS_CREATE_REQ_INIT(op, flags, fp->rss_hw_id, + dev->dev_port, algo, + FUN_ETH_RSS_MAX_KEY_SIZE, + table_len, 0, + FUN_ETH_RSS_MAX_KEY_SIZE); + cmd.req.u.create.dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len); + fun_dataop_gl_init(&cmd.gl, 0, 0, len, fp->rss_dma_addr); + + /* write the key and indirection table into the RSS DMA area */ + memcpy(fp->rss_cfg, key, FUN_ETH_RSS_MAX_KEY_SIZE); + indir_tab = fp->rss_cfg + FUN_ETH_RSS_MAX_KEY_SIZE; + for (rc = 0; rc < table_len; rc++) + *indir_tab++ = cpu_to_be32(rxqs[*qtable++]->hw_cqid); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, + &cmd.rsp, sizeof(cmd.rsp), 0); + if (!rc && op == FUN_ADMIN_SUBOP_CREATE) + fp->rss_hw_id = be32_to_cpu(cmd.rsp.id); + return rc; +} + +/* Destroy the HW RSS conntext associated with the given port. This also stops + * all packet delivery to our Rx queues. + */ +static void fun_destroy_rss(struct funeth_priv *fp) +{ + if (fp->rss_hw_id != FUN_HCI_ID_INVALID) { + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_RSS, 0, fp->rss_hw_id); + fp->rss_hw_id = FUN_HCI_ID_INVALID; + } +} + +static void fun_irq_aff_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct fun_irq *p = container_of(notify, struct fun_irq, aff_notify); + + cpumask_copy(&p->affinity_mask, mask); +} + +static void fun_irq_aff_release(struct kref __always_unused *ref) +{ +} + +/* Allocate an IRQ structure, assign an MSI-X index and initial affinity to it, + * and add it to the IRQ XArray. + */ +static struct fun_irq *fun_alloc_qirq(struct funeth_priv *fp, unsigned int idx, + int node, unsigned int xa_idx_offset) +{ + struct fun_irq *irq; + int cpu, res; + + cpu = cpumask_local_spread(idx, node); + node = cpu_to_mem(cpu); + + irq = kzalloc_node(sizeof(*irq), GFP_KERNEL, node); + if (!irq) + return ERR_PTR(-ENOMEM); + + res = fun_reserve_irqs(fp->fdev, 1, &irq->irq_idx); + if (res != 1) + goto free_irq; + + res = xa_insert(&fp->irqs, idx + xa_idx_offset, irq, GFP_KERNEL); + if (res) + goto release_irq; + + irq->irq = pci_irq_vector(fp->pdev, irq->irq_idx); + cpumask_set_cpu(cpu, &irq->affinity_mask); + irq->aff_notify.notify = fun_irq_aff_notify; + irq->aff_notify.release = fun_irq_aff_release; + irq->state = FUN_IRQ_INIT; + return irq; + +release_irq: + fun_release_irqs(fp->fdev, 1, &irq->irq_idx); +free_irq: + kfree(irq); + return ERR_PTR(res); +} + +static void fun_free_qirq(struct funeth_priv *fp, struct fun_irq *irq) +{ + netif_napi_del(&irq->napi); + fun_release_irqs(fp->fdev, 1, &irq->irq_idx); + kfree(irq); +} + +/* Release the IRQs reserved for Tx/Rx queues that aren't being used. */ +static void fun_prune_queue_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int nreleased = 0; + struct fun_irq *irq; + unsigned long idx; + + xa_for_each(&fp->irqs, idx, irq) { + if (irq->txq || irq->rxq) /* skip those in use */ + continue; + + xa_erase(&fp->irqs, idx); + fun_free_qirq(fp, irq); + nreleased++; + if (idx < fp->rx_irq_ofst) + fp->num_tx_irqs--; + else + fp->num_rx_irqs--; + } + netif_info(fp, intr, dev, "Released %u queue IRQs\n", nreleased); +} + +/* Reserve IRQs, one per queue, to acommodate the requested queue numbers @ntx + * and @nrx. IRQs are added incrementally to those we already have. + * We hold on to allocated IRQs until garbage collection of unused IRQs is + * separately requested. + */ +static int fun_alloc_queue_irqs(struct net_device *dev, unsigned int ntx, + unsigned int nrx) +{ + struct funeth_priv *fp = netdev_priv(dev); + int node = dev_to_node(&fp->pdev->dev); + struct fun_irq *irq; + unsigned int i; + + for (i = fp->num_tx_irqs; i < ntx; i++) { + irq = fun_alloc_qirq(fp, i, node, 0); + if (IS_ERR(irq)) + return PTR_ERR(irq); + + fp->num_tx_irqs++; + netif_napi_add_tx(dev, &irq->napi, fun_txq_napi_poll); + } + + for (i = fp->num_rx_irqs; i < nrx; i++) { + irq = fun_alloc_qirq(fp, i, node, fp->rx_irq_ofst); + if (IS_ERR(irq)) + return PTR_ERR(irq); + + fp->num_rx_irqs++; + netif_napi_add(dev, &irq->napi, fun_rxq_napi_poll); + } + + netif_info(fp, intr, dev, "Reserved %u/%u IRQs for Tx/Rx queues\n", + ntx, nrx); + return 0; +} + +static void free_txqs(struct funeth_txq **txqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && txqs[i]; i++) + txqs[i] = funeth_txq_free(txqs[i], state); +} + +static int alloc_txqs(struct net_device *dev, struct funeth_txq **txqs, + unsigned int nqs, unsigned int depth, unsigned int start, + int state) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i; + int err; + + for (i = start; i < nqs; i++) { + err = funeth_txq_create(dev, i, depth, xa_load(&fp->irqs, i), + state, &txqs[i]); + if (err) { + free_txqs(txqs, nqs, start, FUN_QSTATE_DESTROYED); + return err; + } + } + return 0; +} + +static void free_rxqs(struct funeth_rxq **rxqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && rxqs[i]; i++) + rxqs[i] = funeth_rxq_free(rxqs[i], state); +} + +static int alloc_rxqs(struct net_device *dev, struct funeth_rxq **rxqs, + unsigned int nqs, unsigned int ncqe, unsigned int nrqe, + unsigned int start, int state) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i; + int err; + + for (i = start; i < nqs; i++) { + err = funeth_rxq_create(dev, i, ncqe, nrqe, + xa_load(&fp->irqs, i + fp->rx_irq_ofst), + state, &rxqs[i]); + if (err) { + free_rxqs(rxqs, nqs, start, FUN_QSTATE_DESTROYED); + return err; + } + } + return 0; +} + +static void free_xdpqs(struct funeth_txq **xdpqs, unsigned int nqs, + unsigned int start, int state) +{ + unsigned int i; + + for (i = start; i < nqs && xdpqs[i]; i++) + xdpqs[i] = funeth_txq_free(xdpqs[i], state); + + if (state == FUN_QSTATE_DESTROYED) + kfree(xdpqs); +} + +static struct funeth_txq **alloc_xdpqs(struct net_device *dev, unsigned int nqs, + unsigned int depth, unsigned int start, + int state) +{ + struct funeth_txq **xdpqs; + unsigned int i; + int err; + + xdpqs = kcalloc(nqs, sizeof(*xdpqs), GFP_KERNEL); + if (!xdpqs) + return ERR_PTR(-ENOMEM); + + for (i = start; i < nqs; i++) { + err = funeth_txq_create(dev, i, depth, NULL, state, &xdpqs[i]); + if (err) { + free_xdpqs(xdpqs, nqs, start, FUN_QSTATE_DESTROYED); + return ERR_PTR(err); + } + } + return xdpqs; +} + +static void fun_free_rings(struct net_device *netdev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq **xdpqs = qset->xdpqs; + struct funeth_rxq **rxqs = qset->rxqs; + + /* qset may not specify any queues to operate on. In that case the + * currently installed queues are implied. + */ + if (!rxqs) { + rxqs = rtnl_dereference(fp->rxqs); + xdpqs = rtnl_dereference(fp->xdpqs); + qset->txqs = fp->txqs; + qset->nrxqs = netdev->real_num_rx_queues; + qset->ntxqs = netdev->real_num_tx_queues; + qset->nxdpqs = fp->num_xdpqs; + } + if (!rxqs) + return; + + if (rxqs == rtnl_dereference(fp->rxqs)) { + rcu_assign_pointer(fp->rxqs, NULL); + rcu_assign_pointer(fp->xdpqs, NULL); + synchronize_net(); + fp->txqs = NULL; + } + + free_rxqs(rxqs, qset->nrxqs, qset->rxq_start, qset->state); + free_txqs(qset->txqs, qset->ntxqs, qset->txq_start, qset->state); + free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, qset->state); + if (qset->state == FUN_QSTATE_DESTROYED) + kfree(rxqs); + + /* Tell the caller which queues were operated on. */ + qset->rxqs = rxqs; + qset->xdpqs = xdpqs; +} + +static int fun_alloc_rings(struct net_device *netdev, struct fun_qset *qset) +{ + struct funeth_txq **xdpqs = NULL, **txqs; + struct funeth_rxq **rxqs; + int err; + + err = fun_alloc_queue_irqs(netdev, qset->ntxqs, qset->nrxqs); + if (err) + return err; + + rxqs = kcalloc(qset->ntxqs + qset->nrxqs, sizeof(*rxqs), GFP_KERNEL); + if (!rxqs) + return -ENOMEM; + + if (qset->nxdpqs) { + xdpqs = alloc_xdpqs(netdev, qset->nxdpqs, qset->sq_depth, + qset->xdpq_start, qset->state); + if (IS_ERR(xdpqs)) { + err = PTR_ERR(xdpqs); + goto free_qvec; + } + } + + txqs = (struct funeth_txq **)&rxqs[qset->nrxqs]; + err = alloc_txqs(netdev, txqs, qset->ntxqs, qset->sq_depth, + qset->txq_start, qset->state); + if (err) + goto free_xdpqs; + + err = alloc_rxqs(netdev, rxqs, qset->nrxqs, qset->cq_depth, + qset->rq_depth, qset->rxq_start, qset->state); + if (err) + goto free_txqs; + + qset->rxqs = rxqs; + qset->txqs = txqs; + qset->xdpqs = xdpqs; + return 0; + +free_txqs: + free_txqs(txqs, qset->ntxqs, qset->txq_start, FUN_QSTATE_DESTROYED); +free_xdpqs: + free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, FUN_QSTATE_DESTROYED); +free_qvec: + kfree(rxqs); + return err; +} + +/* Take queues to the next level. Presently this means creating them on the + * device. + */ +static int fun_advance_ring_state(struct net_device *dev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(dev); + int i, err; + + for (i = 0; i < qset->nrxqs; i++) { + err = fun_rxq_create_dev(qset->rxqs[i], + xa_load(&fp->irqs, + i + fp->rx_irq_ofst)); + if (err) + goto out; + } + + for (i = 0; i < qset->ntxqs; i++) { + err = fun_txq_create_dev(qset->txqs[i], xa_load(&fp->irqs, i)); + if (err) + goto out; + } + + for (i = 0; i < qset->nxdpqs; i++) { + err = fun_txq_create_dev(qset->xdpqs[i], NULL); + if (err) + goto out; + } + + return 0; + +out: + fun_free_rings(dev, qset); + return err; +} + +static int fun_port_create(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + union { + struct fun_admin_port_req req; + struct fun_admin_port_rsp rsp; + } cmd; + int rc; + + if (fp->lport != INVALID_LPORT) + return 0; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT, + sizeof(cmd.req)); + cmd.req.u.create = + FUN_ADMIN_PORT_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0, + netdev->dev_port); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd.rsp), 0); + + if (!rc) + fp->lport = be16_to_cpu(cmd.rsp.u.create.lport); + return rc; +} + +static int fun_port_destroy(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + if (fp->lport == INVALID_LPORT) + return 0; + + fp->lport = INVALID_LPORT; + return fun_res_destroy(fp->fdev, FUN_ADMIN_OP_PORT, 0, + netdev->dev_port); +} + +static int fun_eth_create(struct funeth_priv *fp) +{ + union { + struct fun_admin_eth_req req; + struct fun_admin_generic_create_rsp rsp; + } cmd; + int rc; + + cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ETH, + sizeof(cmd.req)); + cmd.req.u.create = FUN_ADMIN_ETH_CREATE_REQ_INIT( + FUN_ADMIN_SUBOP_CREATE, + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, + 0, fp->netdev->dev_port); + + rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp, + sizeof(cmd.rsp), 0); + return rc ? rc : be32_to_cpu(cmd.rsp.id); +} + +static int fun_vi_create(struct funeth_priv *fp) +{ + struct fun_admin_vi_req req = { + .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_VI, + sizeof(req)), + .u.create = FUN_ADMIN_VI_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, + 0, + fp->netdev->dev_port, + fp->netdev->dev_port) + }; + + return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0); +} + +/* Helper to create an ETH flow and bind an SQ to it. + * Returns the ETH id (>= 0) on success or a negative error. + */ +int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid) +{ + int rc, ethid; + + ethid = fun_eth_create(fp); + if (ethid >= 0) { + rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_EPSQ, sqid, + FUN_ADMIN_BIND_TYPE_ETH, ethid); + if (rc) { + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, ethid); + ethid = rc; + } + } + return ethid; +} + +static irqreturn_t fun_queue_irq_handler(int irq, void *data) +{ + struct fun_irq *p = data; + + if (p->rxq) { + prefetch(p->rxq->next_cqe_info); + p->rxq->irq_cnt++; + } + napi_schedule_irqoff(&p->napi); + return IRQ_HANDLED; +} + +static int fun_enable_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned long idx, last; + unsigned int qidx; + struct fun_irq *p; + const char *qtype; + int err; + + xa_for_each(&fp->irqs, idx, p) { + if (p->txq) { + qtype = "tx"; + qidx = p->txq->qidx; + } else if (p->rxq) { + qtype = "rx"; + qidx = p->rxq->qidx; + } else { + continue; + } + + if (p->state != FUN_IRQ_INIT) + continue; + + snprintf(p->name, sizeof(p->name) - 1, "%s-%s-%u", dev->name, + qtype, qidx); + err = request_irq(p->irq, fun_queue_irq_handler, 0, p->name, p); + if (err) { + netdev_err(dev, "Failed to allocate IRQ %u, err %d\n", + p->irq, err); + goto unroll; + } + p->state = FUN_IRQ_REQUESTED; + } + + xa_for_each(&fp->irqs, idx, p) { + if (p->state != FUN_IRQ_REQUESTED) + continue; + irq_set_affinity_notifier(p->irq, &p->aff_notify); + irq_set_affinity_and_hint(p->irq, &p->affinity_mask); + napi_enable(&p->napi); + p->state = FUN_IRQ_ENABLED; + } + + return 0; + +unroll: + last = idx - 1; + xa_for_each_range(&fp->irqs, idx, p, 0, last) + if (p->state == FUN_IRQ_REQUESTED) { + free_irq(p->irq, p); + p->state = FUN_IRQ_INIT; + } + + return err; +} + +static void fun_disable_one_irq(struct fun_irq *irq) +{ + napi_disable(&irq->napi); + irq_set_affinity_notifier(irq->irq, NULL); + irq_update_affinity_hint(irq->irq, NULL); + free_irq(irq->irq, irq); + irq->state = FUN_IRQ_INIT; +} + +static void fun_disable_irqs(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_irq *p; + unsigned long idx; + + xa_for_each(&fp->irqs, idx, p) + if (p->state == FUN_IRQ_ENABLED) + fun_disable_one_irq(p); +} + +static void fun_down(struct net_device *dev, struct fun_qset *qset) +{ + struct funeth_priv *fp = netdev_priv(dev); + + /* If we don't have queues the data path is already down. + * Note netif_running(dev) may be true. + */ + if (!rcu_access_pointer(fp->rxqs)) + return; + + /* It is also down if the queues aren't on the device. */ + if (fp->txqs[0]->init_state >= FUN_QSTATE_INIT_FULL) { + netif_info(fp, ifdown, dev, + "Tearing down data path on device\n"); + fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_DISABLE, 0); + + netif_carrier_off(dev); + netif_tx_disable(dev); + + fun_destroy_rss(fp); + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port); + fun_disable_irqs(dev); + } + + fun_free_rings(dev, qset); +} + +static int fun_up(struct net_device *dev, struct fun_qset *qset) +{ + static const int port_keys[] = { + FUN_ADMIN_PORT_KEY_STATS_DMA_LOW, + FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH, + FUN_ADMIN_PORT_KEY_ENABLE + }; + + struct funeth_priv *fp = netdev_priv(dev); + u64 vals[] = { + lower_32_bits(fp->stats_dma_addr), + upper_32_bits(fp->stats_dma_addr), + FUN_PORT_FLAG_ENABLE_NOTIFY + }; + int err; + + netif_info(fp, ifup, dev, "Setting up data path on device\n"); + + if (qset->rxqs[0]->init_state < FUN_QSTATE_INIT_FULL) { + err = fun_advance_ring_state(dev, qset); + if (err) + return err; + } + + err = fun_vi_create(fp); + if (err) + goto free_queues; + + fp->txqs = qset->txqs; + rcu_assign_pointer(fp->rxqs, qset->rxqs); + rcu_assign_pointer(fp->xdpqs, qset->xdpqs); + + err = fun_enable_irqs(dev); + if (err) + goto destroy_vi; + + if (fp->rss_cfg) { + err = fun_config_rss(dev, fp->hash_algo, fp->rss_key, + fp->indir_table, FUN_ADMIN_SUBOP_CREATE); + } else { + /* The non-RSS case has only 1 queue. */ + err = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_VI, dev->dev_port, + FUN_ADMIN_BIND_TYPE_EPCQ, + qset->rxqs[0]->hw_cqid); + } + if (err) + goto disable_irqs; + + err = fun_port_write_cmds(fp, 3, port_keys, vals); + if (err) + goto free_rss; + + netif_tx_start_all_queues(dev); + return 0; + +free_rss: + fun_destroy_rss(fp); +disable_irqs: + fun_disable_irqs(dev); +destroy_vi: + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port); +free_queues: + fun_free_rings(dev, qset); + return err; +} + +static int funeth_open(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct fun_qset qset = { + .nrxqs = netdev->real_num_rx_queues, + .ntxqs = netdev->real_num_tx_queues, + .nxdpqs = fp->num_xdpqs, + .cq_depth = fp->cq_depth, + .rq_depth = fp->rq_depth, + .sq_depth = fp->sq_depth, + .state = FUN_QSTATE_INIT_FULL, + }; + int rc; + + rc = fun_alloc_rings(netdev, &qset); + if (rc) + return rc; + + rc = fun_up(netdev, &qset); + if (rc) { + qset.state = FUN_QSTATE_DESTROYED; + fun_free_rings(netdev, &qset); + } + + return rc; +} + +static int funeth_close(struct net_device *netdev) +{ + struct fun_qset qset = { .state = FUN_QSTATE_DESTROYED }; + + fun_down(netdev, &qset); + return 0; +} + +static void fun_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i, start; + + stats->tx_packets = fp->tx_packets; + stats->tx_bytes = fp->tx_bytes; + stats->tx_dropped = fp->tx_dropped; + + stats->rx_packets = fp->rx_packets; + stats->rx_bytes = fp->rx_bytes; + stats->rx_dropped = fp->rx_dropped; + + rcu_read_lock(); + rxqs = rcu_dereference(fp->rxqs); + if (!rxqs) + goto unlock; + + for (i = 0; i < netdev->real_num_tx_queues; i++) { + struct funeth_txq_stats txs; + + FUN_QSTAT_READ(fp->txqs[i], start, txs); + stats->tx_packets += txs.tx_pkts; + stats->tx_bytes += txs.tx_bytes; + stats->tx_dropped += txs.tx_map_err; + } + + for (i = 0; i < netdev->real_num_rx_queues; i++) { + struct funeth_rxq_stats rxs; + + FUN_QSTAT_READ(rxqs[i], start, rxs); + stats->rx_packets += rxs.rx_pkts; + stats->rx_bytes += rxs.rx_bytes; + stats->rx_dropped += rxs.rx_map_err + rxs.rx_mem_drops; + } + + xdpqs = rcu_dereference(fp->xdpqs); + if (!xdpqs) + goto unlock; + + for (i = 0; i < fp->num_xdpqs; i++) { + struct funeth_txq_stats txs; + + FUN_QSTAT_READ(xdpqs[i], start, txs); + stats->tx_packets += txs.tx_pkts; + stats->tx_bytes += txs.tx_bytes; + } +unlock: + rcu_read_unlock(); +} + +static int fun_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct funeth_priv *fp = netdev_priv(netdev); + int rc; + + rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MTU, new_mtu); + if (!rc) + netdev->mtu = new_mtu; + return rc; +} + +static int fun_set_macaddr(struct net_device *netdev, void *addr) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct sockaddr *saddr = addr; + int rc; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + if (ether_addr_equal(netdev->dev_addr, saddr->sa_data)) + return 0; + + rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR, + ether_addr_to_u64(saddr->sa_data)); + if (!rc) + eth_hw_addr_set(netdev, saddr->sa_data); + return rc; +} + +static int fun_get_port_attributes(struct net_device *netdev) +{ + static const int keys[] = { + FUN_ADMIN_PORT_KEY_MACADDR, FUN_ADMIN_PORT_KEY_CAPABILITIES, + FUN_ADMIN_PORT_KEY_ADVERT, FUN_ADMIN_PORT_KEY_MTU + }; + static const int phys_keys[] = { + FUN_ADMIN_PORT_KEY_LANE_ATTRS, + }; + + struct funeth_priv *fp = netdev_priv(netdev); + u64 data[ARRAY_SIZE(keys)]; + u8 mac[ETH_ALEN]; + int i, rc; + + rc = fun_port_read_cmds(fp, ARRAY_SIZE(keys), keys, data); + if (rc) + return rc; + + for (i = 0; i < ARRAY_SIZE(keys); i++) { + switch (keys[i]) { + case FUN_ADMIN_PORT_KEY_MACADDR: + u64_to_ether_addr(data[i], mac); + if (is_zero_ether_addr(mac)) { + eth_hw_addr_random(netdev); + } else if (is_valid_ether_addr(mac)) { + eth_hw_addr_set(netdev, mac); + } else { + netdev_err(netdev, + "device provided a bad MAC address %pM\n", + mac); + return -EINVAL; + } + break; + + case FUN_ADMIN_PORT_KEY_CAPABILITIES: + fp->port_caps = data[i]; + break; + + case FUN_ADMIN_PORT_KEY_ADVERT: + fp->advertising = data[i]; + break; + + case FUN_ADMIN_PORT_KEY_MTU: + netdev->mtu = data[i]; + break; + } + } + + if (!(fp->port_caps & FUN_PORT_CAP_VPORT)) { + rc = fun_port_read_cmds(fp, ARRAY_SIZE(phys_keys), phys_keys, + data); + if (rc) + return rc; + + fp->lane_attrs = data[0]; + } + + if (netdev->addr_assign_type == NET_ADDR_RANDOM) + return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR, + ether_addr_to_u64(netdev->dev_addr)); + return 0; +} + +static int fun_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +{ + const struct funeth_priv *fp = netdev_priv(dev); + + return copy_to_user(ifr->ifr_data, &fp->hwtstamp_cfg, + sizeof(fp->hwtstamp_cfg)) ? -EFAULT : 0; +} + +static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct hwtstamp_config cfg; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + /* no TX HW timestamps */ + cfg.tx_type = HWTSTAMP_TX_OFF; + + switch (cfg.rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_NTP_ALL: + cfg.rx_filter = HWTSTAMP_FILTER_ALL; + break; + default: + return -ERANGE; + } + + fp->hwtstamp_cfg = cfg; + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; +} + +static int fun_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCSHWTSTAMP: + return fun_hwtstamp_set(dev, ifr); + case SIOCGHWTSTAMP: + return fun_hwtstamp_get(dev, ifr); + default: + return -EOPNOTSUPP; + } +} + +/* Prepare the queues for XDP. */ +static int fun_enter_xdp(struct net_device *dev, struct bpf_prog *prog) +{ + struct funeth_priv *fp = netdev_priv(dev); + unsigned int i, nqs = num_online_cpus(); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + int err; + + xdpqs = alloc_xdpqs(dev, nqs, fp->sq_depth, 0, FUN_QSTATE_INIT_FULL); + if (IS_ERR(xdpqs)) + return PTR_ERR(xdpqs); + + rxqs = rtnl_dereference(fp->rxqs); + for (i = 0; i < dev->real_num_rx_queues; i++) { + err = fun_rxq_set_bpf(rxqs[i], prog); + if (err) + goto out; + } + + fp->num_xdpqs = nqs; + rcu_assign_pointer(fp->xdpqs, xdpqs); + return 0; +out: + while (i--) + fun_rxq_set_bpf(rxqs[i], NULL); + + free_xdpqs(xdpqs, nqs, 0, FUN_QSTATE_DESTROYED); + return err; +} + +/* Set the queues for non-XDP operation. */ +static void fun_end_xdp(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq **xdpqs; + struct funeth_rxq **rxqs; + unsigned int i; + + xdpqs = rtnl_dereference(fp->xdpqs); + rcu_assign_pointer(fp->xdpqs, NULL); + synchronize_net(); + /* at this point both Rx and Tx XDP processing has ended */ + + free_xdpqs(xdpqs, fp->num_xdpqs, 0, FUN_QSTATE_DESTROYED); + fp->num_xdpqs = 0; + + rxqs = rtnl_dereference(fp->rxqs); + for (i = 0; i < dev->real_num_rx_queues; i++) + fun_rxq_set_bpf(rxqs[i], NULL); +} + +#define XDP_MAX_MTU \ + (PAGE_SIZE - FUN_XDP_HEADROOM - VLAN_ETH_HLEN - FUN_RX_TAILROOM) + +static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct bpf_prog *old_prog, *prog = xdp->prog; + struct funeth_priv *fp = netdev_priv(dev); + int i, err; + + /* XDP uses at most one buffer */ + if (prog && dev->mtu > XDP_MAX_MTU) { + netdev_err(dev, "device MTU %u too large for XDP\n", dev->mtu); + NL_SET_ERR_MSG_MOD(xdp->extack, + "Device MTU too large for XDP"); + return -EINVAL; + } + + if (!netif_running(dev)) { + fp->num_xdpqs = prog ? num_online_cpus() : 0; + } else if (prog && !fp->xdp_prog) { + err = fun_enter_xdp(dev, prog); + if (err) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "Failed to set queues for XDP."); + return err; + } + } else if (!prog && fp->xdp_prog) { + fun_end_xdp(dev); + } else { + struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs); + + for (i = 0; i < dev->real_num_rx_queues; i++) + WRITE_ONCE(rxqs[i]->xdp_prog, prog); + } + + dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU; + old_prog = xchg(&fp->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + return 0; +} + +static int fun_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return fun_xdp_setup(dev, xdp); + default: + return -EINVAL; + } +} + +static struct devlink_port *fun_get_devlink_port(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + + return &fp->dl_port; +} + +static int fun_init_vports(struct fun_ethdev *ed, unsigned int n) +{ + if (ed->num_vports) + return -EINVAL; + + ed->vport_info = kvcalloc(n, sizeof(*ed->vport_info), GFP_KERNEL); + if (!ed->vport_info) + return -ENOMEM; + ed->num_vports = n; + return 0; +} + +static void fun_free_vports(struct fun_ethdev *ed) +{ + kvfree(ed->vport_info); + ed->vport_info = NULL; + ed->num_vports = 0; +} + +static struct fun_vport_info *fun_get_vport(struct fun_ethdev *ed, + unsigned int vport) +{ + if (!ed->vport_info || vport >= ed->num_vports) + return NULL; + + return ed->vport_info + vport; +} + +static int fun_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param mac_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (is_multicast_ether_addr(mac)) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + mac_param.u.mac = FUN_ADI_MAC_INIT(ether_addr_to_u64(mac)); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_MACADDR, vf + 1, + &mac_param); + if (!rc) + ether_addr_copy(vi->mac, mac); +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param vlan_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (vlan > 4095 || qos > 7) + return -EINVAL; + if (vlan_proto && vlan_proto != htons(ETH_P_8021Q) && + vlan_proto != htons(ETH_P_8021AD)) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + vlan_param.u.vlan = FUN_ADI_VLAN_INIT(be16_to_cpu(vlan_proto), + ((u16)qos << VLAN_PRIO_SHIFT) | vlan); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_VLAN, vf + 1, &vlan_param); + if (!rc) { + vi->vlan = vlan; + vi->qos = qos; + vi->vlan_proto = vlan_proto; + } +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, + int max_tx_rate) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_adi_param rate_param = {}; + struct fun_dev *fdev = fp->fdev; + struct fun_ethdev *ed = to_fun_ethdev(fdev); + struct fun_vport_info *vi; + int rc = -EINVAL; + + if (min_tx_rate) + return -EINVAL; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + rate_param.u.rate = FUN_ADI_RATE_INIT(max_tx_rate); + rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_RATE, vf + 1, &rate_param); + if (!rc) + vi->max_rate = max_tx_rate; +unlock: + mutex_unlock(&ed->state_mutex); + return rc; +} + +static int fun_get_vf_config(struct net_device *dev, int vf, + struct ifla_vf_info *ivi) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct fun_ethdev *ed = to_fun_ethdev(fp->fdev); + const struct fun_vport_info *vi; + + mutex_lock(&ed->state_mutex); + vi = fun_get_vport(ed, vf); + if (!vi) + goto unlock; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vf; + ether_addr_copy(ivi->mac, vi->mac); + ivi->vlan = vi->vlan; + ivi->qos = vi->qos; + ivi->vlan_proto = vi->vlan_proto; + ivi->max_tx_rate = vi->max_rate; + ivi->spoofchk = vi->spoofchk; +unlock: + mutex_unlock(&ed->state_mutex); + return vi ? 0 : -EINVAL; +} + +static void fun_uninit(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + + fun_prune_queue_irqs(dev); + xa_destroy(&fp->irqs); +} + +static const struct net_device_ops fun_netdev_ops = { + .ndo_open = funeth_open, + .ndo_stop = funeth_close, + .ndo_start_xmit = fun_start_xmit, + .ndo_get_stats64 = fun_get_stats64, + .ndo_change_mtu = fun_change_mtu, + .ndo_set_mac_address = fun_set_macaddr, + .ndo_validate_addr = eth_validate_addr, + .ndo_eth_ioctl = fun_ioctl, + .ndo_uninit = fun_uninit, + .ndo_bpf = fun_xdp, + .ndo_xdp_xmit = fun_xdp_xmit_frames, + .ndo_set_vf_mac = fun_set_vf_mac, + .ndo_set_vf_vlan = fun_set_vf_vlan, + .ndo_set_vf_rate = fun_set_vf_rate, + .ndo_get_vf_config = fun_get_vf_config, + .ndo_get_devlink_port = fun_get_devlink_port, +}; + +#define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \ + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \ + NETIF_F_GSO_UDP_TUNNEL_CSUM) +#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \ + NETIF_F_GSO_UDP_L4) +#define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \ + GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA) + +static void fun_dflt_rss_indir(struct funeth_priv *fp, unsigned int nrx) +{ + unsigned int i; + + for (i = 0; i < fp->indir_table_nentries; i++) + fp->indir_table[i] = ethtool_rxfh_indir_default(i, nrx); +} + +/* Reset the RSS indirection table to equal distribution across the current + * number of Rx queues. Called at init time and whenever the number of Rx + * queues changes subsequently. Note that this may also resize the indirection + * table. + */ +static void fun_reset_rss_indir(struct net_device *dev, unsigned int nrx) +{ + struct funeth_priv *fp = netdev_priv(dev); + + if (!fp->rss_cfg) + return; + + /* Set the table size to the max possible that allows an equal number + * of occurrences of each CQ. + */ + fp->indir_table_nentries = rounddown(FUN_ETH_RSS_MAX_INDIR_ENT, nrx); + fun_dflt_rss_indir(fp, nrx); +} + +/* Update the RSS LUT to contain only queues in [0, nrx). Normally this will + * update the LUT to an equal distribution among nrx queues, If @only_if_needed + * is set the LUT is left unchanged if it already does not reference any queues + * >= nrx. + */ +static int fun_rss_set_qnum(struct net_device *dev, unsigned int nrx, + bool only_if_needed) +{ + struct funeth_priv *fp = netdev_priv(dev); + u32 old_lut[FUN_ETH_RSS_MAX_INDIR_ENT]; + unsigned int i, oldsz; + int err; + + if (!fp->rss_cfg) + return 0; + + if (only_if_needed) { + for (i = 0; i < fp->indir_table_nentries; i++) + if (fp->indir_table[i] >= nrx) + break; + + if (i >= fp->indir_table_nentries) + return 0; + } + + memcpy(old_lut, fp->indir_table, sizeof(old_lut)); + oldsz = fp->indir_table_nentries; + fun_reset_rss_indir(dev, nrx); + + err = fun_config_rss(dev, fp->hash_algo, fp->rss_key, + fp->indir_table, FUN_ADMIN_SUBOP_MODIFY); + if (!err) + return 0; + + memcpy(fp->indir_table, old_lut, sizeof(old_lut)); + fp->indir_table_nentries = oldsz; + return err; +} + +/* Allocate the DMA area for the RSS configuration commands to the device, and + * initialize the hash, hash key, indirection table size and its entries to + * their defaults. The indirection table defaults to equal distribution across + * the Rx queues. + */ +static int fun_init_rss(struct net_device *dev) +{ + struct funeth_priv *fp = netdev_priv(dev); + size_t size = sizeof(fp->rss_key) + sizeof(fp->indir_table); + + fp->rss_hw_id = FUN_HCI_ID_INVALID; + if (!(fp->port_caps & FUN_PORT_CAP_OFFLOADS)) + return 0; + + fp->rss_cfg = dma_alloc_coherent(&fp->pdev->dev, size, + &fp->rss_dma_addr, GFP_KERNEL); + if (!fp->rss_cfg) + return -ENOMEM; + + fp->hash_algo = FUN_ETH_RSS_ALG_TOEPLITZ; + netdev_rss_key_fill(fp->rss_key, sizeof(fp->rss_key)); + fun_reset_rss_indir(dev, dev->real_num_rx_queues); + return 0; +} + +static void fun_free_rss(struct funeth_priv *fp) +{ + if (fp->rss_cfg) { + dma_free_coherent(&fp->pdev->dev, + sizeof(fp->rss_key) + sizeof(fp->indir_table), + fp->rss_cfg, fp->rss_dma_addr); + fp->rss_cfg = NULL; + } +} + +void fun_set_ring_count(struct net_device *netdev, unsigned int ntx, + unsigned int nrx) +{ + netif_set_real_num_tx_queues(netdev, ntx); + if (nrx != netdev->real_num_rx_queues) { + netif_set_real_num_rx_queues(netdev, nrx); + fun_reset_rss_indir(netdev, nrx); + } +} + +static int fun_init_stats_area(struct funeth_priv *fp) +{ + unsigned int nstats; + + if (!(fp->port_caps & FUN_PORT_CAP_STATS)) + return 0; + + nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX + + PORT_MAC_FEC_STATS_MAX; + + fp->stats = dma_alloc_coherent(&fp->pdev->dev, nstats * sizeof(u64), + &fp->stats_dma_addr, GFP_KERNEL); + if (!fp->stats) + return -ENOMEM; + return 0; +} + +static void fun_free_stats_area(struct funeth_priv *fp) +{ + unsigned int nstats; + + if (fp->stats) { + nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX; + dma_free_coherent(&fp->pdev->dev, nstats * sizeof(u64), + fp->stats, fp->stats_dma_addr); + fp->stats = NULL; + } +} + +static int fun_dl_port_register(struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + struct devlink *dl = priv_to_devlink(fp->fdev); + struct devlink_port_attrs attrs = {}; + unsigned int idx; + + if (fp->port_caps & FUN_PORT_CAP_VPORT) { + attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; + idx = fp->lport; + } else { + idx = netdev->dev_port; + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.lanes = fp->lane_attrs & 7; + if (fp->lane_attrs & FUN_PORT_LANE_SPLIT) { + attrs.split = 1; + attrs.phys.port_number = fp->lport & ~3; + attrs.phys.split_subport_number = fp->lport & 3; + } else { + attrs.phys.port_number = fp->lport; + } + } + + devlink_port_attrs_set(&fp->dl_port, &attrs); + + return devlink_port_register(dl, &fp->dl_port, idx); +} + +/* Determine the max Tx/Rx queues for a port. */ +static int fun_max_qs(struct fun_ethdev *ed, unsigned int *ntx, + unsigned int *nrx) +{ + int neth; + + if (ed->num_ports > 1 || is_kdump_kernel()) { + *ntx = 1; + *nrx = 1; + return 0; + } + + neth = fun_get_res_count(&ed->fdev, FUN_ADMIN_OP_ETH); + if (neth < 0) + return neth; + + /* We determine the max number of queues based on the CPU + * cores, device interrupts and queues, RSS size, and device Tx flows. + * + * - At least 1 Rx and 1 Tx queues. + * - At most 1 Rx/Tx queue per core. + * - Each Rx/Tx queue needs 1 SQ. + */ + *ntx = min(ed->nsqs_per_port - 1, num_online_cpus()); + *nrx = *ntx; + if (*ntx > neth) + *ntx = neth; + if (*nrx > FUN_ETH_RSS_MAX_INDIR_ENT) + *nrx = FUN_ETH_RSS_MAX_INDIR_ENT; + return 0; +} + +static void fun_queue_defaults(struct net_device *dev, unsigned int nsqs) +{ + unsigned int ntx, nrx; + + ntx = min(dev->num_tx_queues, FUN_DFLT_QUEUES); + nrx = min(dev->num_rx_queues, FUN_DFLT_QUEUES); + if (ntx <= nrx) { + ntx = min(ntx, nsqs / 2); + nrx = min(nrx, nsqs - ntx); + } else { + nrx = min(nrx, nsqs / 2); + ntx = min(ntx, nsqs - nrx); + } + + netif_set_real_num_tx_queues(dev, ntx); + netif_set_real_num_rx_queues(dev, nrx); +} + +/* Replace the existing Rx/Tx/XDP queues with equal number of queues with + * different settings, e.g. depth. This is a disruptive replacement that + * temporarily shuts down the data path and should be limited to changes that + * can't be applied to live queues. The old queues are always discarded. + */ +int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs, + struct netlink_ext_ack *extack) +{ + struct fun_qset oldqs = { .state = FUN_QSTATE_DESTROYED }; + struct funeth_priv *fp = netdev_priv(dev); + int err; + + newqs->nrxqs = dev->real_num_rx_queues; + newqs->ntxqs = dev->real_num_tx_queues; + newqs->nxdpqs = fp->num_xdpqs; + newqs->state = FUN_QSTATE_INIT_SW; + err = fun_alloc_rings(dev, newqs); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to allocate memory for new queues, keeping current settings"); + return err; + } + + fun_down(dev, &oldqs); + + err = fun_up(dev, newqs); + if (!err) + return 0; + + /* The new queues couldn't be installed. We do not retry the old queues + * as they are the same to the device as the new queues and would + * similarly fail. + */ + newqs->state = FUN_QSTATE_DESTROYED; + fun_free_rings(dev, newqs); + NL_SET_ERR_MSG_MOD(extack, "Unable to restore the data path with the new queues."); + return err; +} + +/* Change the number of Rx/Tx queues of a device while it is up. This is done + * by incrementally adding/removing queues to meet the new requirements while + * handling ongoing traffic. + */ +int fun_change_num_queues(struct net_device *dev, unsigned int ntx, + unsigned int nrx) +{ + unsigned int keep_tx = min(dev->real_num_tx_queues, ntx); + unsigned int keep_rx = min(dev->real_num_rx_queues, nrx); + struct funeth_priv *fp = netdev_priv(dev); + struct fun_qset oldqs = { + .rxqs = rtnl_dereference(fp->rxqs), + .txqs = fp->txqs, + .nrxqs = dev->real_num_rx_queues, + .ntxqs = dev->real_num_tx_queues, + .rxq_start = keep_rx, + .txq_start = keep_tx, + .state = FUN_QSTATE_DESTROYED + }; + struct fun_qset newqs = { + .nrxqs = nrx, + .ntxqs = ntx, + .rxq_start = keep_rx, + .txq_start = keep_tx, + .cq_depth = fp->cq_depth, + .rq_depth = fp->rq_depth, + .sq_depth = fp->sq_depth, + .state = FUN_QSTATE_INIT_FULL + }; + int i, err; + + err = fun_alloc_rings(dev, &newqs); + if (err) + goto free_irqs; + + err = fun_enable_irqs(dev); /* of any newly added queues */ + if (err) + goto free_rings; + + /* copy the queues we are keeping to the new set */ + memcpy(newqs.rxqs, oldqs.rxqs, keep_rx * sizeof(*oldqs.rxqs)); + memcpy(newqs.txqs, fp->txqs, keep_tx * sizeof(*fp->txqs)); + + if (nrx < dev->real_num_rx_queues) { + err = fun_rss_set_qnum(dev, nrx, true); + if (err) + goto disable_tx_irqs; + + for (i = nrx; i < dev->real_num_rx_queues; i++) + fun_disable_one_irq(container_of(oldqs.rxqs[i]->napi, + struct fun_irq, napi)); + + netif_set_real_num_rx_queues(dev, nrx); + } + + if (ntx < dev->real_num_tx_queues) + netif_set_real_num_tx_queues(dev, ntx); + + rcu_assign_pointer(fp->rxqs, newqs.rxqs); + fp->txqs = newqs.txqs; + synchronize_net(); + + if (ntx > dev->real_num_tx_queues) + netif_set_real_num_tx_queues(dev, ntx); + + if (nrx > dev->real_num_rx_queues) { + netif_set_real_num_rx_queues(dev, nrx); + fun_rss_set_qnum(dev, nrx, false); + } + + /* disable interrupts of any excess Tx queues */ + for (i = keep_tx; i < oldqs.ntxqs; i++) + fun_disable_one_irq(oldqs.txqs[i]->irq); + + fun_free_rings(dev, &oldqs); + fun_prune_queue_irqs(dev); + return 0; + +disable_tx_irqs: + for (i = oldqs.ntxqs; i < ntx; i++) + fun_disable_one_irq(newqs.txqs[i]->irq); +free_rings: + newqs.state = FUN_QSTATE_DESTROYED; + fun_free_rings(dev, &newqs); +free_irqs: + fun_prune_queue_irqs(dev); + return err; +} + +static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid) +{ + struct fun_dev *fdev = &ed->fdev; + struct net_device *netdev; + struct funeth_priv *fp; + unsigned int ntx, nrx; + int rc; + + rc = fun_max_qs(ed, &ntx, &nrx); + if (rc) + return rc; + + netdev = alloc_etherdev_mqs(sizeof(*fp), ntx, nrx); + if (!netdev) { + rc = -ENOMEM; + goto done; + } + + netdev->dev_port = portid; + fun_queue_defaults(netdev, ed->nsqs_per_port); + + fp = netdev_priv(netdev); + fp->fdev = fdev; + fp->pdev = to_pci_dev(fdev->dev); + fp->netdev = netdev; + xa_init(&fp->irqs); + fp->rx_irq_ofst = ntx; + seqcount_init(&fp->link_seq); + + fp->lport = INVALID_LPORT; + rc = fun_port_create(netdev); + if (rc) + goto free_netdev; + + /* bind port to admin CQ for async events */ + rc = fun_bind(fdev, FUN_ADMIN_BIND_TYPE_PORT, portid, + FUN_ADMIN_BIND_TYPE_EPCQ, 0); + if (rc) + goto destroy_port; + + rc = fun_get_port_attributes(netdev); + if (rc) + goto destroy_port; + + rc = fun_init_rss(netdev); + if (rc) + goto destroy_port; + + rc = fun_init_stats_area(fp); + if (rc) + goto free_rss; + + SET_NETDEV_DEV(netdev, fdev->dev); + netdev->netdev_ops = &fun_netdev_ops; + + netdev->hw_features = NETIF_F_SG | NETIF_F_RXHASH | NETIF_F_RXCSUM; + if (fp->port_caps & FUN_PORT_CAP_OFFLOADS) + netdev->hw_features |= NETIF_F_HW_CSUM | TSO_FLAGS; + if (fp->port_caps & FUN_PORT_CAP_ENCAP_OFFLOADS) + netdev->hw_features |= GSO_ENCAP_FLAGS; + + netdev->features |= netdev->hw_features | NETIF_F_HIGHDMA; + netdev->vlan_features = netdev->features & VLAN_FEAT; + netdev->mpls_features = netdev->vlan_features; + netdev->hw_enc_features = netdev->hw_features; + + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = FUN_MAX_MTU; + + fun_set_ethtool_ops(netdev); + + /* configurable parameters */ + fp->sq_depth = min(SQ_DEPTH, fdev->q_depth); + fp->cq_depth = min(CQ_DEPTH, fdev->q_depth); + fp->rq_depth = min_t(unsigned int, RQ_DEPTH, fdev->q_depth); + fp->rx_coal_usec = CQ_INTCOAL_USEC; + fp->rx_coal_count = CQ_INTCOAL_NPKT; + fp->tx_coal_usec = SQ_INTCOAL_USEC; + fp->tx_coal_count = SQ_INTCOAL_NPKT; + fp->cq_irq_db = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count); + + rc = fun_dl_port_register(netdev); + if (rc) + goto free_stats; + + fp->ktls_id = FUN_HCI_ID_INVALID; + fun_ktls_init(netdev); /* optional, failure OK */ + + netif_carrier_off(netdev); + ed->netdevs[portid] = netdev; + rc = register_netdev(netdev); + if (rc) + goto unreg_devlink; + + devlink_port_type_eth_set(&fp->dl_port, netdev); + + return 0; + +unreg_devlink: + ed->netdevs[portid] = NULL; + fun_ktls_cleanup(fp); + devlink_port_unregister(&fp->dl_port); +free_stats: + fun_free_stats_area(fp); +free_rss: + fun_free_rss(fp); +destroy_port: + fun_port_destroy(netdev); +free_netdev: + free_netdev(netdev); +done: + dev_err(fdev->dev, "couldn't allocate port %u, error %d", portid, rc); + return rc; +} + +static void fun_destroy_netdev(struct net_device *netdev) +{ + struct funeth_priv *fp; + + fp = netdev_priv(netdev); + devlink_port_type_clear(&fp->dl_port); + unregister_netdev(netdev); + devlink_port_unregister(&fp->dl_port); + fun_ktls_cleanup(fp); + fun_free_stats_area(fp); + fun_free_rss(fp); + fun_port_destroy(netdev); + free_netdev(netdev); +} + +static int fun_create_ports(struct fun_ethdev *ed, unsigned int nports) +{ + struct fun_dev *fd = &ed->fdev; + int i, rc; + + /* The admin queue takes 1 IRQ and 2 SQs. */ + ed->nsqs_per_port = min(fd->num_irqs - 1, + fd->kern_end_qid - 2) / nports; + if (ed->nsqs_per_port < 2) { + dev_err(fd->dev, "Too few SQs for %u ports", nports); + return -EINVAL; + } + + ed->netdevs = kcalloc(nports, sizeof(*ed->netdevs), GFP_KERNEL); + if (!ed->netdevs) + return -ENOMEM; + + ed->num_ports = nports; + for (i = 0; i < nports; i++) { + rc = fun_create_netdev(ed, i); + if (rc) + goto free_netdevs; + } + + return 0; + +free_netdevs: + while (i) + fun_destroy_netdev(ed->netdevs[--i]); + kfree(ed->netdevs); + ed->netdevs = NULL; + ed->num_ports = 0; + return rc; +} + +static void fun_destroy_ports(struct fun_ethdev *ed) +{ + unsigned int i; + + for (i = 0; i < ed->num_ports; i++) + fun_destroy_netdev(ed->netdevs[i]); + + kfree(ed->netdevs); + ed->netdevs = NULL; + ed->num_ports = 0; +} + +static void fun_update_link_state(const struct fun_ethdev *ed, + const struct fun_admin_port_notif *notif) +{ + unsigned int port_idx = be16_to_cpu(notif->id); + struct net_device *netdev; + struct funeth_priv *fp; + + if (port_idx >= ed->num_ports) + return; + + netdev = ed->netdevs[port_idx]; + fp = netdev_priv(netdev); + + write_seqcount_begin(&fp->link_seq); + fp->link_speed = be32_to_cpu(notif->speed) * 10; /* 10 Mbps->Mbps */ + fp->active_fc = notif->flow_ctrl; + fp->active_fec = notif->fec; + fp->xcvr_type = notif->xcvr_type; + fp->link_down_reason = notif->link_down_reason; + fp->lp_advertising = be64_to_cpu(notif->lp_advertising); + + if ((notif->link_state | notif->missed_events) & FUN_PORT_FLAG_MAC_DOWN) + netif_carrier_off(netdev); + if (notif->link_state & FUN_PORT_FLAG_MAC_UP) + netif_carrier_on(netdev); + + write_seqcount_end(&fp->link_seq); + fun_report_link(netdev); +} + +/* handler for async events delivered through the admin CQ */ +static void fun_event_cb(struct fun_dev *fdev, void *entry) +{ + u8 op = ((struct fun_admin_rsp_common *)entry)->op; + + if (op == FUN_ADMIN_OP_PORT) { + const struct fun_admin_port_notif *rsp = entry; + + if (rsp->subop == FUN_ADMIN_SUBOP_NOTIFY) { + fun_update_link_state(to_fun_ethdev(fdev), rsp); + } else if (rsp->subop == FUN_ADMIN_SUBOP_RES_COUNT) { + const struct fun_admin_res_count_rsp *r = entry; + + if (r->count.data) + set_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags); + else + set_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags); + fun_serv_sched(fdev); + } else { + dev_info(fdev->dev, "adminq event unexpected op %u subop %u", + op, rsp->subop); + } + } else { + dev_info(fdev->dev, "adminq event unexpected op %u", op); + } +} + +/* handler for pending work managed by the service task */ +static void fun_service_cb(struct fun_dev *fdev) +{ + struct fun_ethdev *ed = to_fun_ethdev(fdev); + int rc; + + if (test_and_clear_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags)) + fun_destroy_ports(ed); + + if (!test_and_clear_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags)) + return; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT); + if (rc < 0 || rc == ed->num_ports) + return; + + if (ed->num_ports) + fun_destroy_ports(ed); + if (rc) + fun_create_ports(ed, rc); +} + +static int funeth_sriov_configure(struct pci_dev *pdev, int nvfs) +{ + struct fun_dev *fdev = pci_get_drvdata(pdev); + struct fun_ethdev *ed = to_fun_ethdev(fdev); + int rc; + + if (nvfs == 0) { + if (pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, + "Cannot disable SR-IOV while VFs are assigned\n"); + return -EPERM; + } + + mutex_lock(&ed->state_mutex); + fun_free_vports(ed); + mutex_unlock(&ed->state_mutex); + pci_disable_sriov(pdev); + return 0; + } + + rc = pci_enable_sriov(pdev, nvfs); + if (rc) + return rc; + + mutex_lock(&ed->state_mutex); + rc = fun_init_vports(ed, nvfs); + mutex_unlock(&ed->state_mutex); + if (rc) { + pci_disable_sriov(pdev); + return rc; + } + + return nvfs; +} + +static int funeth_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct fun_dev_params aqreq = { + .cqe_size_log2 = ilog2(ADMIN_CQE_SIZE), + .sqe_size_log2 = ilog2(ADMIN_SQE_SIZE), + .cq_depth = ADMIN_CQ_DEPTH, + .sq_depth = ADMIN_SQ_DEPTH, + .rq_depth = ADMIN_RQ_DEPTH, + .min_msix = 2, /* 1 Rx + 1 Tx */ + .event_cb = fun_event_cb, + .serv_cb = fun_service_cb, + }; + struct devlink *devlink; + struct fun_ethdev *ed; + struct fun_dev *fdev; + int rc; + + devlink = fun_devlink_alloc(&pdev->dev); + if (!devlink) { + dev_err(&pdev->dev, "devlink alloc failed\n"); + return -ENOMEM; + } + + ed = devlink_priv(devlink); + mutex_init(&ed->state_mutex); + + fdev = &ed->fdev; + rc = fun_dev_enable(fdev, pdev, &aqreq, KBUILD_MODNAME); + if (rc) + goto free_devlink; + + rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT); + if (rc > 0) + rc = fun_create_ports(ed, rc); + if (rc < 0) + goto disable_dev; + + fun_serv_restart(fdev); + fun_devlink_register(devlink); + return 0; + +disable_dev: + fun_dev_disable(fdev); +free_devlink: + mutex_destroy(&ed->state_mutex); + fun_devlink_free(devlink); + return rc; +} + +static void funeth_remove(struct pci_dev *pdev) +{ + struct fun_dev *fdev = pci_get_drvdata(pdev); + struct devlink *devlink; + struct fun_ethdev *ed; + + ed = to_fun_ethdev(fdev); + devlink = priv_to_devlink(ed); + fun_devlink_unregister(devlink); + +#ifdef CONFIG_PCI_IOV + funeth_sriov_configure(pdev, 0); +#endif + + fun_serv_stop(fdev); + fun_destroy_ports(ed); + fun_dev_disable(fdev); + mutex_destroy(&ed->state_mutex); + + fun_devlink_free(devlink); +} + +static struct pci_driver funeth_driver = { + .name = KBUILD_MODNAME, + .id_table = funeth_id_table, + .probe = funeth_probe, + .remove = funeth_remove, + .shutdown = funeth_remove, + .sriov_configure = funeth_sriov_configure, +}; + +module_pci_driver(funeth_driver); + +MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>"); +MODULE_DESCRIPTION("Fungible Ethernet Network Driver"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DEVICE_TABLE(pci, funeth_id_table); diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c new file mode 100644 index 000000000..29a6c2ede --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c @@ -0,0 +1,829 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/bpf_trace.h> +#include <linux/dma-mapping.h> +#include <linux/etherdevice.h> +#include <linux/filter.h> +#include <linux/irq.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include "funeth_txrx.h" +#include "funeth.h" +#include "fun_queue.h" + +#define CREATE_TRACE_POINTS +#include "funeth_trace.h" + +/* Given the device's max supported MTU and pages of at least 4KB a packet can + * be scattered into at most 4 buffers. + */ +#define RX_MAX_FRAGS 4 + +/* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */ +#define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) + +/* We try to reuse pages for our buffers. To avoid frequent page ref writes we + * take EXTRA_PAGE_REFS references at once and then hand them out one per packet + * occupying the buffer. + */ +#define EXTRA_PAGE_REFS 1000000 +#define MIN_PAGE_REFS 1000 + +enum { + FUN_XDP_FLUSH_REDIR = 1, + FUN_XDP_FLUSH_TX = 2, +}; + +/* See if a page is running low on refs we are holding and if so take more. */ +static void refresh_refs(struct funeth_rxbuf *buf) +{ + if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) { + buf->pg_refs += EXTRA_PAGE_REFS; + page_ref_add(buf->page, EXTRA_PAGE_REFS); + } +} + +/* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its + * page is worth retaining and there's room for it. Otherwise the page is + * unmapped and our references released. + */ +static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf) +{ + struct funeth_rx_cache *c = &q->cache; + + if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) { + c->bufs[c->prod_cnt & c->mask] = *buf; + c->prod_cnt++; + } else { + dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + __page_frag_cache_drain(buf->page, buf->pg_refs); + } +} + +/* Get a page from the Rx buffer cache. We only consider the next available + * page and return it if we own all its references. + */ +static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb) +{ + struct funeth_rx_cache *c = &q->cache; + struct funeth_rxbuf *buf; + + if (c->prod_cnt == c->cons_cnt) + return false; /* empty cache */ + + buf = &c->bufs[c->cons_cnt & c->mask]; + if (page_ref_count(buf->page) == buf->pg_refs) { + dma_sync_single_for_device(q->dma_dev, buf->dma_addr, + PAGE_SIZE, DMA_FROM_DEVICE); + *rb = *buf; + buf->page = NULL; + refresh_refs(rb); + c->cons_cnt++; + return true; + } + + /* Page can't be reused. If the cache is full drop this page. */ + if (c->prod_cnt - c->cons_cnt > c->mask) { + dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + __page_frag_cache_drain(buf->page, buf->pg_refs); + buf->page = NULL; + c->cons_cnt++; + } + return false; +} + +/* Allocate and DMA-map a page for receive. */ +static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb, + int node, gfp_t gfp) +{ + struct page *p; + + if (cache_get(q, rb)) + return 0; + + p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0); + if (unlikely(!p)) + return -ENOMEM; + + rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) { + FUN_QSTAT_INC(q, rx_map_err); + __free_page(p); + return -ENOMEM; + } + + FUN_QSTAT_INC(q, rx_page_alloc); + + rb->page = p; + rb->pg_refs = 1; + refresh_refs(rb); + rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p); + return 0; +} + +static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb) +{ + if (rb->page) { + dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE); + __page_frag_cache_drain(rb->page, rb->pg_refs); + rb->page = NULL; + } +} + +/* Run the XDP program assigned to an Rx queue. + * Return %NULL if the buffer is consumed, or the virtual address of the packet + * to turn into an skb. + */ +static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va, + int ref_ok, struct funeth_txq *xdp_q) +{ + struct bpf_prog *xdp_prog; + struct xdp_frame *xdpf; + struct xdp_buff xdp; + u32 act; + + /* VA includes the headroom, frag size includes headroom + tailroom */ + xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN), + &q->xdp_rxq); + xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) - + (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false); + + xdp_prog = READ_ONCE(q->xdp_prog); + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + switch (act) { + case XDP_PASS: + /* remove headroom, which may not be FUN_XDP_HEADROOM now */ + skb_frag_size_set(frags, xdp.data_end - xdp.data); + skb_frag_off_add(frags, xdp.data - xdp.data_hard_start); + goto pass; + case XDP_TX: + if (unlikely(!ref_ok)) + goto pass; + + xdpf = xdp_convert_buff_to_frame(&xdp); + if (!xdpf || !fun_xdp_tx(xdp_q, xdpf)) + goto xdp_error; + FUN_QSTAT_INC(q, xdp_tx); + q->xdp_flush |= FUN_XDP_FLUSH_TX; + break; + case XDP_REDIRECT: + if (unlikely(!ref_ok)) + goto pass; + if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog))) + goto xdp_error; + FUN_QSTAT_INC(q, xdp_redir); + q->xdp_flush |= FUN_XDP_FLUSH_REDIR; + break; + default: + bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(q->netdev, xdp_prog, act); +xdp_error: + q->cur_buf->pg_refs++; /* return frags' page reference */ + FUN_QSTAT_INC(q, xdp_err); + break; + case XDP_DROP: + q->cur_buf->pg_refs++; + FUN_QSTAT_INC(q, xdp_drops); + break; + } + return NULL; + +pass: + return xdp.data; +} + +/* A CQE contains a fixed completion structure along with optional metadata and + * even packet data. Given the start address of a CQE return the start of the + * contained fixed structure, which lies at the end. + */ +static const void *cqe_to_info(const void *cqe) +{ + return cqe + FUNETH_CQE_INFO_OFFSET; +} + +/* The inverse of cqe_to_info(). */ +static const void *info_to_cqe(const void *cqe_info) +{ + return cqe_info - FUNETH_CQE_INFO_OFFSET; +} + +/* Return the type of hash provided by the device based on the L3 and L4 + * protocols it parsed for the packet. + */ +static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse) +{ + static const enum pkt_hash_types htype_map[] = { + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3, + PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3 + }; + u16 key; + + /* Build the key from the TCP/UDP and IP/IPv6 bits */ + key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) | + ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1); + + return htype_map[key]; +} + +/* Each received packet can be scattered across several Rx buffers or can + * share a buffer with previously received packets depending on the buffer + * and packet sizes and the room available in the most recently used buffer. + * + * The rules are: + * - If the buffer at the head of an RQ has not been used it gets (part of) the + * next incoming packet. + * - Otherwise, if the packet fully fits in the buffer's remaining space the + * packet is written there. + * - Otherwise, the packet goes into the next Rx buffer. + * + * This function returns the Rx buffer for a packet or fragment thereof of the + * given length. If it isn't @buf it either recycles or frees that buffer + * before advancing the queue to the next buffer. + * + * If called repeatedly with the remaining length of a packet it will walk + * through all the buffers containing the packet. + */ +static struct funeth_rxbuf * +get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len) +{ + if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset) + return buf; /* @buf holds (part of) the packet */ + + /* The packet occupies part of the next buffer. Move there after + * replenishing the current buffer slot either with the spare page or + * by reusing the slot's existing page. Note that if a spare page isn't + * available and the current packet occupies @buf it is a multi-frag + * packet that will be dropped leaving @buf available for reuse. + */ + if ((page_ref_count(buf->page) == buf->pg_refs && + buf->node == numa_mem_id()) || !q->spare_buf.page) { + dma_sync_single_for_device(q->dma_dev, buf->dma_addr, + PAGE_SIZE, DMA_FROM_DEVICE); + refresh_refs(buf); + } else { + cache_offer(q, buf); + *buf = q->spare_buf; + q->spare_buf.page = NULL; + q->rqes[q->rq_cons & q->rq_mask] = + FUN_EPRQ_RQBUF_INIT(buf->dma_addr); + } + q->buf_offset = 0; + q->rq_cons++; + return &q->bufs[q->rq_cons & q->rq_mask]; +} + +/* Gather the page fragments making up the first Rx packet on @q. Its total + * length @tot_len includes optional head- and tail-rooms. + * + * Return 0 if the device retains ownership of at least some of the pages. + * In this case the caller may only copy the packet. + * + * A non-zero return value gives the caller permission to use references to the + * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least + * one of the pages is PF_MEMALLOC. + * + * Regardless of outcome the caller is granted a reference to each of the pages. + */ +static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len, + skb_frag_t *frags) +{ + struct funeth_rxbuf *buf = q->cur_buf; + unsigned int frag_len; + int ref_ok = 1; + + for (;;) { + buf = get_buf(q, buf, tot_len); + + /* We always keep the RQ full of buffers so before we can give + * one of our pages to the stack we require that we can obtain + * a replacement page. If we can't the packet will either be + * copied or dropped so we can retain ownership of the page and + * reuse it. + */ + if (!q->spare_buf.page && + funeth_alloc_page(q, &q->spare_buf, numa_mem_id(), + GFP_ATOMIC | __GFP_MEMALLOC)) + ref_ok = 0; + + frag_len = min_t(unsigned int, tot_len, + PAGE_SIZE - q->buf_offset); + dma_sync_single_for_cpu(q->dma_dev, + buf->dma_addr + q->buf_offset, + frag_len, DMA_FROM_DEVICE); + buf->pg_refs--; + if (ref_ok) + ref_ok |= buf->node; + + __skb_frag_set_page(frags, buf->page); + skb_frag_off_set(frags, q->buf_offset); + skb_frag_size_set(frags++, frag_len); + + tot_len -= frag_len; + if (!tot_len) + break; + + q->buf_offset = PAGE_SIZE; + } + q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN); + q->cur_buf = buf; + return ref_ok; +} + +static bool rx_hwtstamp_enabled(const struct net_device *dev) +{ + const struct funeth_priv *d = netdev_priv(dev); + + return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL; +} + +/* Advance the CQ pointers and phase tag to the next CQE. */ +static void advance_cq(struct funeth_rxq *q) +{ + if (unlikely(q->cq_head == q->cq_mask)) { + q->cq_head = 0; + q->phase ^= 1; + q->next_cqe_info = cqe_to_info(q->cqes); + } else { + q->cq_head++; + q->next_cqe_info += FUNETH_CQE_SIZE; + } + prefetch(q->next_cqe_info); +} + +/* Process the packet represented by the head CQE of @q. Gather the packet's + * fragments, run it through the optional XDP program, and if needed construct + * an skb and pass it to the stack. + */ +static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q) +{ + const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info); + unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len); + struct net_device *ndev = q->netdev; + skb_frag_t frags[RX_MAX_FRAGS]; + struct skb_shared_info *si; + unsigned int headroom; + gro_result_t gro_res; + struct sk_buff *skb; + int ref_ok; + void *va; + u16 cv; + + u64_stats_update_begin(&q->syncp); + q->stats.rx_pkts++; + q->stats.rx_bytes += pkt_len; + u64_stats_update_end(&q->syncp); + + advance_cq(q); + + /* account for head- and tail-room, present only for 1-buffer packets */ + tot_len = pkt_len; + headroom = be16_to_cpu(rxreq->headroom); + if (likely(headroom)) + tot_len += FUN_RX_TAILROOM + headroom; + + ref_ok = fun_gather_pkt(q, tot_len, frags); + va = skb_frag_address(frags); + if (xdp_q && headroom == FUN_XDP_HEADROOM) { + va = fun_run_xdp(q, frags, va, ref_ok, xdp_q); + if (!va) + return; + headroom = 0; /* XDP_PASS trims it */ + } + if (unlikely(!ref_ok)) + goto no_mem; + + if (likely(headroom)) { + /* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */ + prefetch(va + headroom); + skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN)); + if (unlikely(!skb)) + goto no_mem; + + skb_reserve(skb, headroom); + __skb_put(skb, pkt_len); + skb->protocol = eth_type_trans(skb, ndev); + } else { + prefetch(va); + skb = napi_get_frags(q->napi); + if (unlikely(!skb)) + goto no_mem; + + if (ref_ok < 0) + skb->pfmemalloc = 1; + + si = skb_shinfo(skb); + si->nr_frags = rxreq->nsgl; + for (i = 0; i < si->nr_frags; i++) + si->frags[i] = frags[i]; + + skb->len = pkt_len; + skb->data_len = pkt_len; + skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN); + } + + skb_record_rx_queue(skb, q->qidx); + cv = be16_to_cpu(rxreq->pkt_cv); + if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash)) + skb_set_hash(skb, be32_to_cpu(rxreq->hash), + cqe_to_pkt_hash_type(cv)); + if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) { + FUN_QSTAT_INC(q, rx_cso); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->csum_level = be16_to_cpu(rxreq->csum) - 1; + } + if (unlikely(rx_hwtstamp_enabled(q->netdev))) + skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp); + + trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv); + + gro_res = skb->data_len ? napi_gro_frags(q->napi) : + napi_gro_receive(q->napi, skb); + if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE) + FUN_QSTAT_INC(q, gro_merged); + else if (gro_res == GRO_HELD) + FUN_QSTAT_INC(q, gro_pkts); + return; + +no_mem: + FUN_QSTAT_INC(q, rx_mem_drops); + + /* Release the references we've been granted for the frag pages. + * We return the ref of the last frag and free the rest. + */ + q->cur_buf->pg_refs++; + for (i = 0; i < rxreq->nsgl - 1; i++) + __free_page(skb_frag_page(frags + i)); +} + +/* Return 0 if the phase tag of the CQE at the CQ's head matches expectations + * indicating the CQE is new. + */ +static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase) +{ + u16 sf_p = be16_to_cpu(ci->sf_p); + + return (sf_p & 1) ^ phase; +} + +/* Walk through a CQ identifying and processing fresh CQEs up to the given + * budget. Return the remaining budget. + */ +static int fun_process_cqes(struct funeth_rxq *q, int budget) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct funeth_txq **xdpqs, *xdp_q = NULL; + + xdpqs = rcu_dereference_bh(fp->xdpqs); + if (xdpqs) + xdp_q = xdpqs[smp_processor_id()]; + + while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) { + /* access other descriptor fields after the phase check */ + dma_rmb(); + + fun_handle_cqe_pkt(q, xdp_q); + budget--; + } + + if (unlikely(q->xdp_flush)) { + if (q->xdp_flush & FUN_XDP_FLUSH_TX) + fun_txq_wr_db(xdp_q); + if (q->xdp_flush & FUN_XDP_FLUSH_REDIR) + xdp_do_flush(); + q->xdp_flush = 0; + } + + return budget; +} + +/* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ + * doorbells as needed. + */ +int fun_rxq_napi_poll(struct napi_struct *napi, int budget) +{ + struct fun_irq *irq = container_of(napi, struct fun_irq, napi); + struct funeth_rxq *q = irq->rxq; + int work_done = budget - fun_process_cqes(q, budget); + u32 cq_db_val = q->cq_head; + + if (unlikely(work_done >= budget)) + FUN_QSTAT_INC(q, rx_budget); + else if (napi_complete_done(napi, work_done)) + cq_db_val |= q->irq_db_val; + + /* check whether to post new Rx buffers */ + if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) { + u64_stats_update_begin(&q->syncp); + q->stats.rx_bufs += q->rq_cons - q->rq_cons_db; + u64_stats_update_end(&q->syncp); + q->rq_cons_db = q->rq_cons; + writel((q->rq_cons - 1) & q->rq_mask, q->rq_db); + } + + writel(cq_db_val, q->cq_db); + return work_done; +} + +/* Free the Rx buffers of an Rx queue. */ +static void fun_rxq_free_bufs(struct funeth_rxq *q) +{ + struct funeth_rxbuf *b = q->bufs; + unsigned int i; + + for (i = 0; i <= q->rq_mask; i++, b++) + funeth_free_page(q, b); + + funeth_free_page(q, &q->spare_buf); + q->cur_buf = NULL; +} + +/* Initially provision an Rx queue with Rx buffers. */ +static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node) +{ + struct funeth_rxbuf *b = q->bufs; + unsigned int i; + + for (i = 0; i <= q->rq_mask; i++, b++) { + if (funeth_alloc_page(q, b, node, GFP_KERNEL)) { + fun_rxq_free_bufs(q); + return -ENOMEM; + } + q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr); + } + q->cur_buf = q->bufs; + return 0; +} + +/* Initialize a used-buffer cache of the given depth. */ +static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth, + int node) +{ + c->mask = depth - 1; + c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node); + return c->bufs ? 0 : -ENOMEM; +} + +/* Deallocate an Rx queue's used-buffer cache and its contents. */ +static void fun_rxq_free_cache(struct funeth_rxq *q) +{ + struct funeth_rxbuf *b = q->cache.bufs; + unsigned int i; + + for (i = 0; i <= q->cache.mask; i++, b++) + funeth_free_page(q, b); + + kvfree(q->cache.bufs); + q->cache.bufs = NULL; +} + +int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct fun_admin_epcq_req cmd; + u16 headroom; + int err; + + headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; + if (headroom != q->headroom) { + cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ, + sizeof(cmd)); + cmd.u.modify = + FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY, + 0, q->hw_cqid, headroom); + err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0, + 0); + if (err) + return err; + q->headroom = headroom; + } + + WRITE_ONCE(q->xdp_prog, prog); + return 0; +} + +/* Create an Rx queue, allocating the host memory it needs. */ +static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev, + unsigned int qidx, + unsigned int ncqe, + unsigned int nrqe, + struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_rxq *q; + int err = -ENOMEM; + int numa_node; + + numa_node = fun_irq_node(irq); + q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node); + if (!q) + goto err; + + q->qidx = qidx; + q->netdev = dev; + q->cq_mask = ncqe - 1; + q->rq_mask = nrqe - 1; + q->numa_node = numa_node; + q->rq_db_thres = nrqe / 4; + u64_stats_init(&q->syncp); + q->dma_dev = &fp->pdev->dev; + + q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), + sizeof(*q->bufs), false, numa_node, + &q->rq_dma_addr, (void **)&q->bufs, NULL); + if (!q->rqes) + goto free_q; + + q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0, + false, numa_node, &q->cq_dma_addr, NULL, + NULL); + if (!q->cqes) + goto free_rqes; + + err = fun_rxq_init_cache(&q->cache, nrqe, numa_node); + if (err) + goto free_cqes; + + err = fun_rxq_alloc_bufs(q, numa_node); + if (err) + goto free_cache; + + q->stats.rx_bufs = q->rq_mask; + q->init_state = FUN_QSTATE_INIT_SW; + return q; + +free_cache: + fun_rxq_free_cache(q); +free_cqes: + dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes, + q->cq_dma_addr); +free_rqes: + fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes, + q->rq_dma_addr, q->bufs); +free_q: + kfree(q); +err: + netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx); + return ERR_PTR(err); +} + +static void fun_rxq_free_sw(struct funeth_rxq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + fun_rxq_free_cache(q); + fun_rxq_free_bufs(q); + fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false, + q->rqes, q->rq_dma_addr, q->bufs); + dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE, + q->cqes, q->cq_dma_addr); + + /* Before freeing the queue transfer key counters to the device. */ + fp->rx_packets += q->stats.rx_pkts; + fp->rx_bytes += q->stats.rx_bytes; + fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops; + + kfree(q); +} + +/* Create an Rx queue's resources on the device. */ +int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + unsigned int ncqe = q->cq_mask + 1; + unsigned int nrqe = q->rq_mask + 1; + int err; + + err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx, + irq->napi.napi_id); + if (err) + goto out; + + err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + if (err) + goto xdp_unreg; + + q->phase = 1; + q->irq_cnt = 0; + q->cq_head = 0; + q->rq_cons = 0; + q->rq_cons_db = 0; + q->buf_offset = 0; + q->napi = &irq->napi; + q->irq_db_val = fp->cq_irq_db; + q->next_cqe_info = cqe_to_info(q->cqes); + + q->xdp_prog = fp->xdp_prog; + q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM; + + err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | + FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0, + FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0, + 0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT, + &q->hw_sqid, &q->rq_db); + if (err) + goto xdp_unreg; + + err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR | + FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0, + q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe, + q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0, + irq->irq_idx, 0, fp->fdev->kern_end_qid, + &q->hw_cqid, &q->cq_db); + if (err) + goto free_rq; + + irq->rxq = q; + writel(q->rq_mask, q->rq_db); + q->init_state = FUN_QSTATE_INIT_FULL; + + netif_info(fp, ifup, q->netdev, + "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n", + q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx, + q->numa_node, q->headroom); + return 0; + +free_rq: + fun_destroy_sq(fp->fdev, q->hw_sqid); +xdp_unreg: + xdp_rxq_info_unreg(&q->xdp_rxq); +out: + netdev_err(q->netdev, + "Failed to create Rx queue %u on device, error %d\n", + q->qidx, err); + return err; +} + +static void fun_rxq_free_dev(struct funeth_rxq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + struct fun_irq *irq; + + if (q->init_state < FUN_QSTATE_INIT_FULL) + return; + + irq = container_of(q->napi, struct fun_irq, napi); + netif_info(fp, ifdown, q->netdev, + "Freeing Rx queue %u (id %u/%u), IRQ %u\n", + q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx); + + irq->rxq = NULL; + xdp_rxq_info_unreg(&q->xdp_rxq); + fun_destroy_sq(fp->fdev, q->hw_sqid); + fun_destroy_cq(fp->fdev, q->hw_cqid); + q->init_state = FUN_QSTATE_INIT_SW; +} + +/* Create or advance an Rx queue, allocating all the host and device resources + * needed to reach the target state. + */ +int funeth_rxq_create(struct net_device *dev, unsigned int qidx, + unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq, + int state, struct funeth_rxq **qp) +{ + struct funeth_rxq *q = *qp; + int err; + + if (!q) { + q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq); + if (IS_ERR(q)) + return PTR_ERR(q); + } + + if (q->init_state >= state) + goto out; + + err = fun_rxq_create_dev(q, irq); + if (err) { + if (!*qp) + fun_rxq_free_sw(q); + return err; + } + +out: + *qp = q; + return 0; +} + +/* Free Rx queue resources until it reaches the target state. */ +struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state) +{ + if (state < FUN_QSTATE_INIT_FULL) + fun_rxq_free_dev(q); + + if (state == FUN_QSTATE_DESTROYED) { + fun_rxq_free_sw(q); + q = NULL; + } + + return q; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_trace.h b/drivers/net/ethernet/fungible/funeth/funeth_trace.h new file mode 100644 index 000000000..9e58dfec1 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_trace.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM funeth + +#if !defined(_TRACE_FUNETH_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FUNETH_H + +#include <linux/tracepoint.h> + +#include "funeth_txrx.h" + +TRACE_EVENT(funeth_tx, + + TP_PROTO(const struct funeth_txq *txq, + u32 len, + u32 sqe_idx, + u32 ngle), + + TP_ARGS(txq, len, sqe_idx, ngle), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, len) + __field(u32, sqe_idx) + __field(u32, ngle) + __string(devname, txq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = txq->qidx; + __entry->len = len; + __entry->sqe_idx = sqe_idx; + __entry->ngle = ngle; + __assign_str(devname, txq->netdev->name); + ), + + TP_printk("%s: Txq %u, SQE idx %u, len %u, num GLEs %u", + __get_str(devname), __entry->qidx, __entry->sqe_idx, + __entry->len, __entry->ngle) +); + +TRACE_EVENT(funeth_tx_free, + + TP_PROTO(const struct funeth_txq *txq, + u32 sqe_idx, + u32 num_sqes, + u32 hw_head), + + TP_ARGS(txq, sqe_idx, num_sqes, hw_head), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, sqe_idx) + __field(u32, num_sqes) + __field(u32, hw_head) + __string(devname, txq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = txq->qidx; + __entry->sqe_idx = sqe_idx; + __entry->num_sqes = num_sqes; + __entry->hw_head = hw_head; + __assign_str(devname, txq->netdev->name); + ), + + TP_printk("%s: Txq %u, SQE idx %u, SQEs %u, HW head %u", + __get_str(devname), __entry->qidx, __entry->sqe_idx, + __entry->num_sqes, __entry->hw_head) +); + +TRACE_EVENT(funeth_rx, + + TP_PROTO(const struct funeth_rxq *rxq, + u32 num_rqes, + u32 pkt_len, + u32 hash, + u32 cls_vec), + + TP_ARGS(rxq, num_rqes, pkt_len, hash, cls_vec), + + TP_STRUCT__entry( + __field(u32, qidx) + __field(u32, cq_head) + __field(u32, num_rqes) + __field(u32, len) + __field(u32, hash) + __field(u32, cls_vec) + __string(devname, rxq->netdev->name) + ), + + TP_fast_assign( + __entry->qidx = rxq->qidx; + __entry->cq_head = rxq->cq_head; + __entry->num_rqes = num_rqes; + __entry->len = pkt_len; + __entry->hash = hash; + __entry->cls_vec = cls_vec; + __assign_str(devname, rxq->netdev->name); + ), + + TP_printk("%s: Rxq %u, CQ head %u, RQEs %u, len %u, hash %u, CV %#x", + __get_str(devname), __entry->qidx, __entry->cq_head, + __entry->num_rqes, __entry->len, __entry->hash, + __entry->cls_vec) +); + +#endif /* _TRACE_FUNETH_H */ + +/* Below must be outside protection. */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE funeth_trace + +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c new file mode 100644 index 000000000..706d81e39 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c @@ -0,0 +1,801 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) + +#include <linux/dma-mapping.h> +#include <linux/ip.h> +#include <linux/pci.h> +#include <linux/skbuff.h> +#include <linux/tcp.h> +#include <uapi/linux/udp.h> +#include "funeth.h" +#include "funeth_ktls.h" +#include "funeth_txrx.h" +#include "funeth_trace.h" +#include "fun_queue.h" + +#define FUN_XDP_CLEAN_THRES 32 +#define FUN_XDP_CLEAN_BATCH 16 + +/* DMA-map a packet and return the (length, DMA_address) pairs for its + * segments. If a mapping error occurs -ENOMEM is returned. The packet + * consists of an skb_shared_info and one additional address/length pair. + */ +static int fun_map_pkt(struct device *dev, const struct skb_shared_info *si, + void *data, unsigned int data_len, + dma_addr_t *addr, unsigned int *len) +{ + const skb_frag_t *fp, *end; + + *len = data_len; + *addr = dma_map_single(dev, data, *len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, *addr)) + return -ENOMEM; + + if (!si) + return 0; + + for (fp = si->frags, end = fp + si->nr_frags; fp < end; fp++) { + *++len = skb_frag_size(fp); + *++addr = skb_frag_dma_map(dev, fp, 0, *len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, *addr)) + goto unwind; + } + return 0; + +unwind: + while (fp-- > si->frags) + dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE); + + dma_unmap_single(dev, addr[-1], data_len, DMA_TO_DEVICE); + return -ENOMEM; +} + +/* Return the address just past the end of a Tx queue's descriptor ring. + * It exploits the fact that the HW writeback area is just after the end + * of the descriptor ring. + */ +static void *txq_end(const struct funeth_txq *q) +{ + return (void *)q->hw_wb; +} + +/* Return the amount of space within a Tx ring from the given address to the + * end. + */ +static unsigned int txq_to_end(const struct funeth_txq *q, void *p) +{ + return txq_end(q) - p; +} + +/* Return the number of Tx descriptors occupied by a Tx request. */ +static unsigned int tx_req_ndesc(const struct fun_eth_tx_req *req) +{ + return DIV_ROUND_UP(req->len8, FUNETH_SQE_SIZE / 8); +} + +/* Write a gather list to the Tx descriptor at @req from @ngle address/length + * pairs. + */ +static struct fun_dataop_gl *fun_write_gl(const struct funeth_txq *q, + struct fun_eth_tx_req *req, + const dma_addr_t *addrs, + const unsigned int *lens, + unsigned int ngle) +{ + struct fun_dataop_gl *gle; + unsigned int i; + + req->len8 = (sizeof(*req) + ngle * sizeof(*gle)) / 8; + + for (i = 0, gle = (struct fun_dataop_gl *)req->dataop.imm; + i < ngle && txq_to_end(q, gle); i++, gle++) + fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]); + + if (txq_to_end(q, gle) == 0) { + gle = (struct fun_dataop_gl *)q->desc; + for ( ; i < ngle; i++, gle++) + fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]); + } + + return gle; +} + +static __be16 tcp_hdr_doff_flags(const struct tcphdr *th) +{ + return *(__be16 *)&tcp_flag_word(th); +} + +static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q, + unsigned int *tls_len) +{ +#if IS_ENABLED(CONFIG_TLS_DEVICE) + const struct fun_ktls_tx_ctx *tls_ctx; + u32 datalen, seq; + + datalen = skb->len - skb_tcp_all_headers(skb); + if (!datalen) + return skb; + + if (likely(!tls_offload_tx_resync_pending(skb->sk))) { + seq = ntohl(tcp_hdr(skb)->seq); + tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + + if (likely(tls_ctx->next_seq == seq)) { + *tls_len = datalen; + return skb; + } + if (seq - tls_ctx->next_seq < U32_MAX / 4) { + tls_offload_tx_resync_request(skb->sk, seq, + tls_ctx->next_seq); + } + } + + FUN_QSTAT_INC(q, tx_tls_fallback); + skb = tls_encrypt_skb(skb); + if (!skb) + FUN_QSTAT_INC(q, tx_tls_drops); + + return skb; +#else + return NULL; +#endif +} + +/* Write as many descriptors as needed for the supplied skb starting at the + * current producer location. The caller has made certain enough descriptors + * are available. + * + * Returns the number of descriptors written, 0 on error. + */ +static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q, + unsigned int tls_len) +{ + unsigned int extra_bytes = 0, extra_pkts = 0; + unsigned int idx = q->prod_cnt & q->mask; + const struct skb_shared_info *shinfo; + unsigned int lens[MAX_SKB_FRAGS + 1]; + dma_addr_t addrs[MAX_SKB_FRAGS + 1]; + struct fun_eth_tx_req *req; + struct fun_dataop_gl *gle; + const struct tcphdr *th; + unsigned int l4_hlen; + unsigned int ngle; + u16 flags; + + shinfo = skb_shinfo(skb); + if (unlikely(fun_map_pkt(q->dma_dev, shinfo, skb->data, + skb_headlen(skb), addrs, lens))) { + FUN_QSTAT_INC(q, tx_map_err); + return 0; + } + + req = fun_tx_desc_addr(q, idx); + req->op = FUN_ETH_OP_TX; + req->len8 = 0; + req->flags = 0; + req->suboff8 = offsetof(struct fun_eth_tx_req, dataop); + req->repr_idn = 0; + req->encap_proto = 0; + + if (likely(shinfo->gso_size)) { + if (skb->encapsulation) { + u16 ol4_ofst; + + flags = FUN_ETH_OUTER_EN | FUN_ETH_INNER_LSO | + FUN_ETH_UPDATE_INNER_L4_CKSUM | + FUN_ETH_UPDATE_OUTER_L3_LEN; + if (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)) { + flags |= FUN_ETH_UPDATE_OUTER_L4_LEN | + FUN_ETH_OUTER_UDP; + if (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) + flags |= FUN_ETH_UPDATE_OUTER_L4_CKSUM; + ol4_ofst = skb_transport_offset(skb); + } else { + ol4_ofst = skb_inner_network_offset(skb); + } + + if (ip_hdr(skb)->version == 4) + flags |= FUN_ETH_UPDATE_OUTER_L3_CKSUM; + else + flags |= FUN_ETH_OUTER_IPV6; + + if (skb->inner_network_header) { + if (inner_ip_hdr(skb)->version == 4) + flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM | + FUN_ETH_UPDATE_INNER_L3_LEN; + else + flags |= FUN_ETH_INNER_IPV6 | + FUN_ETH_UPDATE_INNER_L3_LEN; + } + th = inner_tcp_hdr(skb); + l4_hlen = __tcp_hdrlen(th); + fun_eth_offload_init(&req->offload, flags, + shinfo->gso_size, + tcp_hdr_doff_flags(th), 0, + skb_inner_network_offset(skb), + skb_inner_transport_offset(skb), + skb_network_offset(skb), ol4_ofst); + FUN_QSTAT_INC(q, tx_encap_tso); + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + flags = FUN_ETH_INNER_LSO | FUN_ETH_INNER_UDP | + FUN_ETH_UPDATE_INNER_L4_CKSUM | + FUN_ETH_UPDATE_INNER_L4_LEN | + FUN_ETH_UPDATE_INNER_L3_LEN; + + if (ip_hdr(skb)->version == 4) + flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM; + else + flags |= FUN_ETH_INNER_IPV6; + + l4_hlen = sizeof(struct udphdr); + fun_eth_offload_init(&req->offload, flags, + shinfo->gso_size, + cpu_to_be16(l4_hlen << 10), 0, + skb_network_offset(skb), + skb_transport_offset(skb), 0, 0); + FUN_QSTAT_INC(q, tx_uso); + } else { + /* HW considers one set of headers as inner */ + flags = FUN_ETH_INNER_LSO | + FUN_ETH_UPDATE_INNER_L4_CKSUM | + FUN_ETH_UPDATE_INNER_L3_LEN; + if (shinfo->gso_type & SKB_GSO_TCPV6) + flags |= FUN_ETH_INNER_IPV6; + else + flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM; + th = tcp_hdr(skb); + l4_hlen = __tcp_hdrlen(th); + fun_eth_offload_init(&req->offload, flags, + shinfo->gso_size, + tcp_hdr_doff_flags(th), 0, + skb_network_offset(skb), + skb_transport_offset(skb), 0, 0); + FUN_QSTAT_INC(q, tx_tso); + } + + u64_stats_update_begin(&q->syncp); + q->stats.tx_cso += shinfo->gso_segs; + u64_stats_update_end(&q->syncp); + + extra_pkts = shinfo->gso_segs - 1; + extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) + + l4_hlen) * extra_pkts; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + flags = FUN_ETH_UPDATE_INNER_L4_CKSUM; + if (skb->csum_offset == offsetof(struct udphdr, check)) + flags |= FUN_ETH_INNER_UDP; + fun_eth_offload_init(&req->offload, flags, 0, 0, 0, 0, + skb_checksum_start_offset(skb), 0, 0); + FUN_QSTAT_INC(q, tx_cso); + } else { + fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0); + } + + ngle = shinfo->nr_frags + 1; + req->dataop = FUN_DATAOP_HDR_INIT(ngle, 0, ngle, 0, skb->len); + + gle = fun_write_gl(q, req, addrs, lens, ngle); + + if (IS_ENABLED(CONFIG_TLS_DEVICE) && unlikely(tls_len)) { + struct fun_eth_tls *tls = (struct fun_eth_tls *)gle; + struct fun_ktls_tx_ctx *tls_ctx; + + req->len8 += FUNETH_TLS_SZ / 8; + req->flags = cpu_to_be16(FUN_ETH_TX_TLS); + + tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + tls->tlsid = tls_ctx->tlsid; + tls_ctx->next_seq += tls_len; + + u64_stats_update_begin(&q->syncp); + q->stats.tx_tls_bytes += tls_len; + q->stats.tx_tls_pkts += 1 + extra_pkts; + u64_stats_update_end(&q->syncp); + } + + u64_stats_update_begin(&q->syncp); + q->stats.tx_bytes += skb->len + extra_bytes; + q->stats.tx_pkts += 1 + extra_pkts; + u64_stats_update_end(&q->syncp); + + q->info[idx].skb = skb; + + trace_funeth_tx(q, skb->len, idx, req->dataop.ngather); + return tx_req_ndesc(req); +} + +/* Return the number of available descriptors of a Tx queue. + * HW assumes head==tail means the ring is empty so we need to keep one + * descriptor unused. + */ +static unsigned int fun_txq_avail(const struct funeth_txq *q) +{ + return q->mask - q->prod_cnt + q->cons_cnt; +} + +/* Stop a queue if it can't handle another worst-case packet. */ +static void fun_tx_check_stop(struct funeth_txq *q) +{ + if (likely(fun_txq_avail(q) >= FUNETH_MAX_PKT_DESC)) + return; + + netif_tx_stop_queue(q->ndq); + + /* NAPI reclaim is freeing packets in parallel with us and we may race. + * We have stopped the queue but check again after synchronizing with + * reclaim. + */ + smp_mb(); + if (likely(fun_txq_avail(q) < FUNETH_MAX_PKT_DESC)) + FUN_QSTAT_INC(q, tx_nstops); + else + netif_tx_start_queue(q->ndq); +} + +/* Return true if a queue has enough space to restart. Current condition is + * that the queue must be >= 1/4 empty. + */ +static bool fun_txq_may_restart(struct funeth_txq *q) +{ + return fun_txq_avail(q) >= q->mask / 4; +} + +netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct funeth_priv *fp = netdev_priv(netdev); + unsigned int qid = skb_get_queue_mapping(skb); + struct funeth_txq *q = fp->txqs[qid]; + unsigned int tls_len = 0; + unsigned int ndesc; + + if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk && + tls_is_sk_tx_device_offloaded(skb->sk)) { + skb = fun_tls_tx(skb, q, &tls_len); + if (unlikely(!skb)) + goto dropped; + } + + ndesc = write_pkt_desc(skb, q, tls_len); + if (unlikely(!ndesc)) { + dev_kfree_skb_any(skb); + goto dropped; + } + + q->prod_cnt += ndesc; + fun_tx_check_stop(q); + + skb_tx_timestamp(skb); + + if (__netdev_tx_sent_queue(q->ndq, skb->len, netdev_xmit_more())) + fun_txq_wr_db(q); + else + FUN_QSTAT_INC(q, tx_more); + + return NETDEV_TX_OK; + +dropped: + /* A dropped packet may be the last one in a xmit_more train, + * ring the doorbell just in case. + */ + if (!netdev_xmit_more()) + fun_txq_wr_db(q); + return NETDEV_TX_OK; +} + +/* Return a Tx queue's HW head index written back to host memory. */ +static u16 txq_hw_head(const struct funeth_txq *q) +{ + return (u16)be64_to_cpu(*q->hw_wb); +} + +/* Unmap the Tx packet starting at the given descriptor index and + * return the number of Tx descriptors it occupied. + */ +static unsigned int fun_unmap_pkt(const struct funeth_txq *q, unsigned int idx) +{ + const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx); + unsigned int ngle = req->dataop.ngather; + struct fun_dataop_gl *gle; + + if (ngle) { + gle = (struct fun_dataop_gl *)req->dataop.imm; + dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE); + + for (gle++; --ngle && txq_to_end(q, gle); gle++) + dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), + DMA_TO_DEVICE); + + for (gle = (struct fun_dataop_gl *)q->desc; ngle; ngle--, gle++) + dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data), + be32_to_cpu(gle->sgl_len), + DMA_TO_DEVICE); + } + + return tx_req_ndesc(req); +} + +/* Reclaim completed Tx descriptors and free their packets. Restart a stopped + * queue if we freed enough descriptors. + * + * Return true if we exhausted the budget while there is more work to be done. + */ +static bool fun_txq_reclaim(struct funeth_txq *q, int budget) +{ + unsigned int npkts = 0, nbytes = 0, ndesc = 0; + unsigned int head, limit, reclaim_idx; + + /* budget may be 0, e.g., netpoll */ + limit = budget ? budget : UINT_MAX; + + for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask; + head != reclaim_idx && npkts < limit; head = txq_hw_head(q)) { + /* The HW head is continually updated, ensure we don't read + * descriptor state before the head tells us to reclaim it. + * On the enqueue side the doorbell is an implicit write + * barrier. + */ + rmb(); + + do { + unsigned int pkt_desc = fun_unmap_pkt(q, reclaim_idx); + struct sk_buff *skb = q->info[reclaim_idx].skb; + + trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head); + + nbytes += skb->len; + napi_consume_skb(skb, budget); + ndesc += pkt_desc; + reclaim_idx = (reclaim_idx + pkt_desc) & q->mask; + npkts++; + } while (reclaim_idx != head && npkts < limit); + } + + q->cons_cnt += ndesc; + netdev_tx_completed_queue(q->ndq, npkts, nbytes); + smp_mb(); /* pairs with the one in fun_tx_check_stop() */ + + if (unlikely(netif_tx_queue_stopped(q->ndq) && + fun_txq_may_restart(q))) { + netif_tx_wake_queue(q->ndq); + FUN_QSTAT_INC(q, tx_nrestarts); + } + + return reclaim_idx != head; +} + +/* The NAPI handler for Tx queues. */ +int fun_txq_napi_poll(struct napi_struct *napi, int budget) +{ + struct fun_irq *irq = container_of(napi, struct fun_irq, napi); + struct funeth_txq *q = irq->txq; + unsigned int db_val; + + if (fun_txq_reclaim(q, budget)) + return budget; /* exhausted budget */ + + napi_complete(napi); /* exhausted pending work */ + db_val = READ_ONCE(q->irq_db_val) | (q->cons_cnt & q->mask); + writel(db_val, q->db); + return 0; +} + +/* Reclaim up to @budget completed Tx packets from a TX XDP queue. */ +static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget) +{ + unsigned int npkts = 0, ndesc = 0, head, reclaim_idx; + + for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask; + head != reclaim_idx && npkts < budget; head = txq_hw_head(q)) { + /* The HW head is continually updated, ensure we don't read + * descriptor state before the head tells us to reclaim it. + * On the enqueue side the doorbell is an implicit write + * barrier. + */ + rmb(); + + do { + unsigned int pkt_desc = fun_unmap_pkt(q, reclaim_idx); + + xdp_return_frame(q->info[reclaim_idx].xdpf); + + trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head); + + reclaim_idx = (reclaim_idx + pkt_desc) & q->mask; + ndesc += pkt_desc; + npkts++; + } while (reclaim_idx != head && npkts < budget); + } + + q->cons_cnt += ndesc; + return npkts; +} + +bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf) +{ + unsigned int idx, nfrags = 1, ndesc = 1, tot_len = xdpf->len; + const struct skb_shared_info *si = NULL; + unsigned int lens[MAX_SKB_FRAGS + 1]; + dma_addr_t dma[MAX_SKB_FRAGS + 1]; + struct fun_eth_tx_req *req; + + if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES) + fun_xdpq_clean(q, FUN_XDP_CLEAN_BATCH); + + if (unlikely(xdp_frame_has_frags(xdpf))) { + si = xdp_get_shared_info_from_frame(xdpf); + tot_len = xdp_get_frame_len(xdpf); + nfrags += si->nr_frags; + ndesc = DIV_ROUND_UP((sizeof(*req) + nfrags * + sizeof(struct fun_dataop_gl)), + FUNETH_SQE_SIZE); + } + + if (unlikely(fun_txq_avail(q) < ndesc)) { + FUN_QSTAT_INC(q, tx_xdp_full); + return false; + } + + if (unlikely(fun_map_pkt(q->dma_dev, si, xdpf->data, xdpf->len, dma, + lens))) { + FUN_QSTAT_INC(q, tx_map_err); + return false; + } + + idx = q->prod_cnt & q->mask; + req = fun_tx_desc_addr(q, idx); + req->op = FUN_ETH_OP_TX; + req->len8 = 0; + req->flags = 0; + req->suboff8 = offsetof(struct fun_eth_tx_req, dataop); + req->repr_idn = 0; + req->encap_proto = 0; + fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0); + req->dataop = FUN_DATAOP_HDR_INIT(nfrags, 0, nfrags, 0, tot_len); + + fun_write_gl(q, req, dma, lens, nfrags); + + q->info[idx].xdpf = xdpf; + + u64_stats_update_begin(&q->syncp); + q->stats.tx_bytes += tot_len; + q->stats.tx_pkts++; + u64_stats_update_end(&q->syncp); + + trace_funeth_tx(q, tot_len, idx, nfrags); + q->prod_cnt += ndesc; + + return true; +} + +int fun_xdp_xmit_frames(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq *q, **xdpqs; + int i, q_idx; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + xdpqs = rcu_dereference_bh(fp->xdpqs); + if (unlikely(!xdpqs)) + return -ENETDOWN; + + q_idx = smp_processor_id(); + if (unlikely(q_idx >= fp->num_xdpqs)) + return -ENXIO; + + for (q = xdpqs[q_idx], i = 0; i < n; i++) + if (!fun_xdp_tx(q, frames[i])) + break; + + if (unlikely(flags & XDP_XMIT_FLUSH)) + fun_txq_wr_db(q); + return i; +} + +/* Purge a Tx queue of any queued packets. Should be called once HW access + * to the packets has been revoked, e.g., after the queue has been disabled. + */ +static void fun_txq_purge(struct funeth_txq *q) +{ + while (q->cons_cnt != q->prod_cnt) { + unsigned int idx = q->cons_cnt & q->mask; + + q->cons_cnt += fun_unmap_pkt(q, idx); + dev_kfree_skb_any(q->info[idx].skb); + } + netdev_tx_reset_queue(q->ndq); +} + +static void fun_xdpq_purge(struct funeth_txq *q) +{ + while (q->cons_cnt != q->prod_cnt) { + unsigned int idx = q->cons_cnt & q->mask; + + q->cons_cnt += fun_unmap_pkt(q, idx); + xdp_return_frame(q->info[idx].xdpf); + } +} + +/* Create a Tx queue, allocating all the host resources needed. */ +static struct funeth_txq *fun_txq_create_sw(struct net_device *dev, + unsigned int qidx, + unsigned int ndesc, + struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(dev); + struct funeth_txq *q; + int numa_node; + + if (irq) + numa_node = fun_irq_node(irq); /* skb Tx queue */ + else + numa_node = cpu_to_node(qidx); /* XDP Tx queue */ + + q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node); + if (!q) + goto err; + + q->dma_dev = &fp->pdev->dev; + q->desc = fun_alloc_ring_mem(q->dma_dev, ndesc, FUNETH_SQE_SIZE, + sizeof(*q->info), true, numa_node, + &q->dma_addr, (void **)&q->info, + &q->hw_wb); + if (!q->desc) + goto free_q; + + q->netdev = dev; + q->mask = ndesc - 1; + q->qidx = qidx; + q->numa_node = numa_node; + u64_stats_init(&q->syncp); + q->init_state = FUN_QSTATE_INIT_SW; + return q; + +free_q: + kfree(q); +err: + netdev_err(dev, "Can't allocate memory for %s queue %u\n", + irq ? "Tx" : "XDP", qidx); + return NULL; +} + +static void fun_txq_free_sw(struct funeth_txq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + fun_free_ring_mem(q->dma_dev, q->mask + 1, FUNETH_SQE_SIZE, true, + q->desc, q->dma_addr, q->info); + + fp->tx_packets += q->stats.tx_pkts; + fp->tx_bytes += q->stats.tx_bytes; + fp->tx_dropped += q->stats.tx_map_err; + + kfree(q); +} + +/* Allocate the device portion of a Tx queue. */ +int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + unsigned int irq_idx, ndesc = q->mask + 1; + int err; + + q->irq = irq; + *q->hw_wb = 0; + q->prod_cnt = 0; + q->cons_cnt = 0; + irq_idx = irq ? irq->irq_idx : 0; + + err = fun_sq_create(fp->fdev, + FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS | + FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, 0, + FUN_HCI_ID_INVALID, ilog2(FUNETH_SQE_SIZE), ndesc, + q->dma_addr, fp->tx_coal_count, fp->tx_coal_usec, + irq_idx, 0, fp->fdev->kern_end_qid, 0, + &q->hw_qid, &q->db); + if (err) + goto out; + + err = fun_create_and_bind_tx(fp, q->hw_qid); + if (err < 0) + goto free_devq; + q->ethid = err; + + if (irq) { + irq->txq = q; + q->ndq = netdev_get_tx_queue(q->netdev, q->qidx); + q->irq_db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec, + fp->tx_coal_count); + writel(q->irq_db_val, q->db); + } + + q->init_state = FUN_QSTATE_INIT_FULL; + netif_info(fp, ifup, q->netdev, + "%s queue %u, depth %u, HW qid %u, IRQ idx %u, eth id %u, node %d\n", + irq ? "Tx" : "XDP", q->qidx, ndesc, q->hw_qid, irq_idx, + q->ethid, q->numa_node); + return 0; + +free_devq: + fun_destroy_sq(fp->fdev, q->hw_qid); +out: + netdev_err(q->netdev, + "Failed to create %s queue %u on device, error %d\n", + irq ? "Tx" : "XDP", q->qidx, err); + return err; +} + +static void fun_txq_free_dev(struct funeth_txq *q) +{ + struct funeth_priv *fp = netdev_priv(q->netdev); + + if (q->init_state < FUN_QSTATE_INIT_FULL) + return; + + netif_info(fp, ifdown, q->netdev, + "Freeing %s queue %u (id %u), IRQ %u, ethid %u\n", + q->irq ? "Tx" : "XDP", q->qidx, q->hw_qid, + q->irq ? q->irq->irq_idx : 0, q->ethid); + + fun_destroy_sq(fp->fdev, q->hw_qid); + fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, q->ethid); + + if (q->irq) { + q->irq->txq = NULL; + fun_txq_purge(q); + } else { + fun_xdpq_purge(q); + } + + q->init_state = FUN_QSTATE_INIT_SW; +} + +/* Create or advance a Tx queue, allocating all the host and device resources + * needed to reach the target state. + */ +int funeth_txq_create(struct net_device *dev, unsigned int qidx, + unsigned int ndesc, struct fun_irq *irq, int state, + struct funeth_txq **qp) +{ + struct funeth_txq *q = *qp; + int err; + + if (!q) + q = fun_txq_create_sw(dev, qidx, ndesc, irq); + if (!q) + return -ENOMEM; + + if (q->init_state >= state) + goto out; + + err = fun_txq_create_dev(q, irq); + if (err) { + if (!*qp) + fun_txq_free_sw(q); + return err; + } + +out: + *qp = q; + return 0; +} + +/* Free Tx queue resources until it reaches the target state. + * The queue must be already disconnected from the stack. + */ +struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state) +{ + if (state < FUN_QSTATE_INIT_FULL) + fun_txq_free_dev(q); + + if (state == FUN_QSTATE_DESTROYED) { + fun_txq_free_sw(q); + q = NULL; + } + + return q; +} diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h new file mode 100644 index 000000000..671f51135 --- /dev/null +++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */ + +#ifndef _FUNETH_TXRX_H +#define _FUNETH_TXRX_H + +#include <linux/netdevice.h> +#include <linux/u64_stats_sync.h> + +/* Tx descriptor size */ +#define FUNETH_SQE_SIZE 64U + +/* Size of device headers per Tx packet */ +#define FUNETH_FUNOS_HDR_SZ (sizeof(struct fun_eth_tx_req)) + +/* Number of gather list entries per Tx descriptor */ +#define FUNETH_GLE_PER_DESC (FUNETH_SQE_SIZE / sizeof(struct fun_dataop_gl)) + +/* Max gather list size in bytes for an sk_buff. */ +#define FUNETH_MAX_GL_SZ ((MAX_SKB_FRAGS + 1) * sizeof(struct fun_dataop_gl)) + +#if IS_ENABLED(CONFIG_TLS_DEVICE) +# define FUNETH_TLS_SZ sizeof(struct fun_eth_tls) +#else +# define FUNETH_TLS_SZ 0 +#endif + +/* Max number of Tx descriptors for an sk_buff using a gather list. */ +#define FUNETH_MAX_GL_DESC \ + DIV_ROUND_UP((FUNETH_FUNOS_HDR_SZ + FUNETH_MAX_GL_SZ + FUNETH_TLS_SZ), \ + FUNETH_SQE_SIZE) + +/* Max number of Tx descriptors for any packet. */ +#define FUNETH_MAX_PKT_DESC FUNETH_MAX_GL_DESC + +/* Rx CQ descriptor size. */ +#define FUNETH_CQE_SIZE 64U + +/* Offset of cqe_info within a CQE. */ +#define FUNETH_CQE_INFO_OFFSET (FUNETH_CQE_SIZE - sizeof(struct fun_cqe_info)) + +/* Construct the IRQ portion of a CQ doorbell. The resulting value arms the + * interrupt with the supplied time delay and packet count moderation settings. + */ +#define FUN_IRQ_CQ_DB(usec, pkts) \ + (FUN_DB_IRQ_ARM_F | ((usec) << FUN_DB_INTCOAL_USEC_S) | \ + ((pkts) << FUN_DB_INTCOAL_ENTRIES_S)) + +/* As above for SQ doorbells. */ +#define FUN_IRQ_SQ_DB(usec, pkts) \ + (FUN_DB_IRQ_ARM_F | \ + ((usec) << FUN_DB_INTCOAL_USEC_S) | \ + ((pkts) << FUN_DB_INTCOAL_ENTRIES_S)) + +/* Per packet tailroom. Present only for 1-frag packets. */ +#define FUN_RX_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + +/* Per packet headroom for XDP. Preferred over XDP_PACKET_HEADROOM to + * accommodate two packets per buffer for 4K pages and 1500B MTUs. + */ +#define FUN_XDP_HEADROOM 192 + +/* Initialization state of a queue. */ +enum { + FUN_QSTATE_DESTROYED, /* what queue? */ + FUN_QSTATE_INIT_SW, /* exists in SW, not on the device */ + FUN_QSTATE_INIT_FULL, /* exists both in SW and on device */ +}; + +/* Initialization state of an interrupt. */ +enum { + FUN_IRQ_INIT, /* initialized and in the XArray but inactive */ + FUN_IRQ_REQUESTED, /* request_irq() done */ + FUN_IRQ_ENABLED, /* processing enabled */ + FUN_IRQ_DISABLED, /* processing disabled */ +}; + +struct bpf_prog; + +struct funeth_txq_stats { /* per Tx queue SW counters */ + u64 tx_pkts; /* # of Tx packets */ + u64 tx_bytes; /* total bytes of Tx packets */ + u64 tx_cso; /* # of packets with checksum offload */ + u64 tx_tso; /* # of non-encapsulated TSO super-packets */ + u64 tx_encap_tso; /* # of encapsulated TSO super-packets */ + u64 tx_uso; /* # of non-encapsulated UDP LSO super-packets */ + u64 tx_more; /* # of DBs elided due to xmit_more */ + u64 tx_nstops; /* # of times the queue has stopped */ + u64 tx_nrestarts; /* # of times the queue has restarted */ + u64 tx_map_err; /* # of packets dropped due to DMA mapping errors */ + u64 tx_xdp_full; /* # of XDP packets that could not be enqueued */ + u64 tx_tls_pkts; /* # of Tx TLS packets offloaded to HW */ + u64 tx_tls_bytes; /* Tx bytes of HW-handled TLS payload */ + u64 tx_tls_fallback; /* attempted Tx TLS offloads punted to SW */ + u64 tx_tls_drops; /* attempted Tx TLS offloads dropped */ +}; + +struct funeth_tx_info { /* per Tx descriptor state */ + union { + struct sk_buff *skb; /* associated packet (sk_buff path) */ + struct xdp_frame *xdpf; /* associated XDP frame (XDP path) */ + }; +}; + +struct funeth_txq { + /* RO cacheline of frequently accessed data */ + u32 mask; /* queue depth - 1 */ + u32 hw_qid; /* device ID of the queue */ + void *desc; /* base address of descriptor ring */ + struct funeth_tx_info *info; + struct device *dma_dev; /* device for DMA mappings */ + volatile __be64 *hw_wb; /* HW write-back location */ + u32 __iomem *db; /* SQ doorbell register address */ + struct netdev_queue *ndq; + dma_addr_t dma_addr; /* DMA address of descriptor ring */ + /* producer R/W cacheline */ + u16 qidx; /* queue index within net_device */ + u16 ethid; + u32 prod_cnt; /* producer counter */ + struct funeth_txq_stats stats; + /* shared R/W cacheline, primarily accessed by consumer */ + u32 irq_db_val; /* value written to IRQ doorbell */ + u32 cons_cnt; /* consumer (cleanup) counter */ + struct net_device *netdev; + struct fun_irq *irq; + int numa_node; + u8 init_state; /* queue initialization state */ + struct u64_stats_sync syncp; +}; + +struct funeth_rxq_stats { /* per Rx queue SW counters */ + u64 rx_pkts; /* # of received packets, including SW drops */ + u64 rx_bytes; /* total size of received packets */ + u64 rx_cso; /* # of packets with checksum offload */ + u64 rx_bufs; /* total # of Rx buffers provided to device */ + u64 gro_pkts; /* # of GRO superpackets */ + u64 gro_merged; /* # of pkts merged into existing GRO superpackets */ + u64 rx_page_alloc; /* # of page allocations for Rx buffers */ + u64 rx_budget; /* NAPI iterations that exhausted their budget */ + u64 rx_mem_drops; /* # of packets dropped due to memory shortage */ + u64 rx_map_err; /* # of page DMA mapping errors */ + u64 xdp_drops; /* XDP_DROPped packets */ + u64 xdp_tx; /* successful XDP transmits */ + u64 xdp_redir; /* successful XDP redirects */ + u64 xdp_err; /* packets dropped due to XDP errors */ +}; + +struct funeth_rxbuf { /* per Rx buffer state */ + struct page *page; /* associated page */ + dma_addr_t dma_addr; /* DMA address of page start */ + int pg_refs; /* page refs held by driver */ + int node; /* page node, or -1 if it is PF_MEMALLOC */ +}; + +struct funeth_rx_cache { /* cache of DMA-mapped previously used buffers */ + struct funeth_rxbuf *bufs; /* base of Rx buffer state ring */ + unsigned int prod_cnt; /* producer counter */ + unsigned int cons_cnt; /* consumer counter */ + unsigned int mask; /* depth - 1 */ +}; + +/* An Rx queue consists of a CQ and an SQ used to provide Rx buffers. */ +struct funeth_rxq { + struct net_device *netdev; + struct napi_struct *napi; + struct device *dma_dev; /* device for DMA mappings */ + void *cqes; /* base of CQ descriptor ring */ + const void *next_cqe_info; /* fun_cqe_info of next CQE */ + u32 __iomem *cq_db; /* CQ doorbell register address */ + unsigned int cq_head; /* CQ head index */ + unsigned int cq_mask; /* CQ depth - 1 */ + u16 phase; /* CQ phase tag */ + u16 qidx; /* queue index within net_device */ + unsigned int irq_db_val; /* IRQ info for CQ doorbell */ + struct fun_eprq_rqbuf *rqes; /* base of RQ descriptor ring */ + struct funeth_rxbuf *bufs; /* base of Rx buffer state ring */ + struct funeth_rxbuf *cur_buf; /* currently active buffer */ + u32 __iomem *rq_db; /* RQ doorbell register address */ + unsigned int rq_cons; /* RQ consumer counter */ + unsigned int rq_mask; /* RQ depth - 1 */ + unsigned int buf_offset; /* offset of next pkt in head buffer */ + u8 xdp_flush; /* XDP flush types needed at NAPI end */ + u8 init_state; /* queue initialization state */ + u16 headroom; /* per packet headroom */ + unsigned int rq_cons_db; /* value of rq_cons at last RQ db */ + unsigned int rq_db_thres; /* # of new buffers needed to write RQ db */ + struct funeth_rxbuf spare_buf; /* spare for next buffer replacement */ + struct funeth_rx_cache cache; /* used buffer cache */ + struct bpf_prog *xdp_prog; /* optional XDP BPF program */ + struct funeth_rxq_stats stats; + dma_addr_t cq_dma_addr; /* DMA address of CQE ring */ + dma_addr_t rq_dma_addr; /* DMA address of RQE ring */ + u16 irq_cnt; + u32 hw_cqid; /* device ID of the queue's CQ */ + u32 hw_sqid; /* device ID of the queue's SQ */ + int numa_node; + struct u64_stats_sync syncp; + struct xdp_rxq_info xdp_rxq; +}; + +#define FUN_QSTAT_INC(q, counter) \ + do { \ + u64_stats_update_begin(&(q)->syncp); \ + (q)->stats.counter++; \ + u64_stats_update_end(&(q)->syncp); \ + } while (0) + +#define FUN_QSTAT_READ(q, seq, stats_copy) \ + do { \ + seq = u64_stats_fetch_begin_irq(&(q)->syncp); \ + stats_copy = (q)->stats; \ + } while (u64_stats_fetch_retry_irq(&(q)->syncp, (seq))) + +#define FUN_INT_NAME_LEN (IFNAMSIZ + 16) + +struct fun_irq { + struct napi_struct napi; + struct funeth_txq *txq; + struct funeth_rxq *rxq; + u8 state; + u16 irq_idx; /* index of MSI-X interrupt */ + int irq; /* Linux IRQ vector */ + cpumask_t affinity_mask; /* IRQ affinity */ + struct irq_affinity_notify aff_notify; + char name[FUN_INT_NAME_LEN]; +} ____cacheline_internodealigned_in_smp; + +/* Return the start address of the idx-th Tx descriptor. */ +static inline void *fun_tx_desc_addr(const struct funeth_txq *q, + unsigned int idx) +{ + return q->desc + idx * FUNETH_SQE_SIZE; +} + +static inline void fun_txq_wr_db(const struct funeth_txq *q) +{ + unsigned int tail = q->prod_cnt & q->mask; + + writel(tail, q->db); +} + +static inline int fun_irq_node(const struct fun_irq *p) +{ + return cpu_to_mem(cpumask_first(&p->affinity_mask)); +} + +int fun_rxq_napi_poll(struct napi_struct *napi, int budget); +int fun_txq_napi_poll(struct napi_struct *napi, int budget); +netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev); +bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf); +int fun_xdp_xmit_frames(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags); + +int funeth_txq_create(struct net_device *dev, unsigned int qidx, + unsigned int ndesc, struct fun_irq *irq, int state, + struct funeth_txq **qp); +int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq); +struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state); +int funeth_rxq_create(struct net_device *dev, unsigned int qidx, + unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq, + int state, struct funeth_rxq **qp); +int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq); +struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state); +int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog); + +#endif /* _FUNETH_TXRX_H */ |