summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/fungible
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/fungible')
-rw-r--r--drivers/net/ethernet/fungible/Kconfig28
-rw-r--r--drivers/net/ethernet/fungible/Makefile7
-rw-r--r--drivers/net/ethernet/fungible/funcore/Makefile5
-rw-r--r--drivers/net/ethernet/fungible/funcore/fun_dev.c843
-rw-r--r--drivers/net/ethernet/fungible/funcore/fun_dev.h150
-rw-r--r--drivers/net/ethernet/fungible/funcore/fun_hci.h1242
-rw-r--r--drivers/net/ethernet/fungible/funcore/fun_queue.c601
-rw-r--r--drivers/net/ethernet/fungible/funcore/fun_queue.h175
-rw-r--r--drivers/net/ethernet/fungible/funeth/Kconfig17
-rw-r--r--drivers/net/ethernet/fungible/funeth/Makefile10
-rw-r--r--drivers/net/ethernet/fungible/funeth/fun_port.h97
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth.h171
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_devlink.c34
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_devlink.h13
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_ethtool.c1198
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_ktls.c155
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_ktls.h30
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_main.c2086
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_rx.c829
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_trace.h117
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_tx.c801
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_txrx.h265
22 files changed, 8874 insertions, 0 deletions
diff --git a/drivers/net/ethernet/fungible/Kconfig b/drivers/net/ethernet/fungible/Kconfig
new file mode 100644
index 000000000..1ecedecc0
--- /dev/null
+++ b/drivers/net/ethernet/fungible/Kconfig
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Fungible network driver configuration
+#
+
+config NET_VENDOR_FUNGIBLE
+ bool "Fungible devices"
+ default y
+ help
+ If you have a Fungible network device, say Y.
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about Fungible cards. If you say Y, you will be asked
+ for your specific card in the following questions.
+
+if NET_VENDOR_FUNGIBLE
+
+config FUN_CORE
+ tristate
+ select SBITMAP
+ help
+ A service module offering basic common services to Fungible
+ device drivers.
+
+source "drivers/net/ethernet/fungible/funeth/Kconfig"
+
+endif # NET_VENDOR_FUNGIBLE
diff --git a/drivers/net/ethernet/fungible/Makefile b/drivers/net/ethernet/fungible/Makefile
new file mode 100644
index 000000000..df759f158
--- /dev/null
+++ b/drivers/net/ethernet/fungible/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+#
+# Makefile for the Fungible network device drivers.
+#
+
+obj-$(CONFIG_FUN_CORE) += funcore/
+obj-$(CONFIG_FUN_ETH) += funeth/
diff --git a/drivers/net/ethernet/fungible/funcore/Makefile b/drivers/net/ethernet/fungible/funcore/Makefile
new file mode 100644
index 000000000..bc16b264b
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funcore/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+obj-$(CONFIG_FUN_CORE) += funcore.o
+
+funcore-y := fun_dev.o fun_queue.o
diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.c b/drivers/net/ethernet/fungible/funcore/fun_dev.c
new file mode 100644
index 000000000..fb5120d90
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funcore/fun_dev.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include <linux/aer.h>
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/nvme.h>
+#include <linux/pci.h>
+#include <linux/wait.h>
+#include <linux/sched/signal.h>
+
+#include "fun_queue.h"
+#include "fun_dev.h"
+
+#define FUN_ADMIN_CMD_TO_MS 3000
+
+enum {
+ AQA_ASQS_SHIFT = 0,
+ AQA_ACQS_SHIFT = 16,
+ AQA_MIN_QUEUE_SIZE = 2,
+ AQA_MAX_QUEUE_SIZE = 4096
+};
+
+/* context for admin commands */
+struct fun_cmd_ctx {
+ fun_admin_callback_t cb; /* callback to invoke on completion */
+ void *cb_data; /* user data provided to callback */
+ int cpu; /* CPU where the cmd's tag was allocated */
+};
+
+/* Context for synchronous admin commands. */
+struct fun_sync_cmd_ctx {
+ struct completion compl;
+ u8 *rsp_buf; /* caller provided response buffer */
+ unsigned int rsp_len; /* response buffer size */
+ u8 rsp_status; /* command response status */
+};
+
+/* Wait for the CSTS.RDY bit to match @enabled. */
+static int fun_wait_ready(struct fun_dev *fdev, bool enabled)
+{
+ unsigned int cap_to = NVME_CAP_TIMEOUT(fdev->cap_reg);
+ u32 bit = enabled ? NVME_CSTS_RDY : 0;
+ unsigned long deadline;
+
+ deadline = ((cap_to + 1) * HZ / 2) + jiffies; /* CAP.TO is in 500ms */
+
+ for (;;) {
+ u32 csts = readl(fdev->bar + NVME_REG_CSTS);
+
+ if (csts == ~0) {
+ dev_err(fdev->dev, "CSTS register read %#x\n", csts);
+ return -EIO;
+ }
+
+ if ((csts & NVME_CSTS_RDY) == bit)
+ return 0;
+
+ if (time_is_before_jiffies(deadline))
+ break;
+
+ msleep(100);
+ }
+
+ dev_err(fdev->dev,
+ "Timed out waiting for device to indicate RDY %u; aborting %s\n",
+ enabled, enabled ? "initialization" : "reset");
+ return -ETIMEDOUT;
+}
+
+/* Check CSTS and return an error if it is unreadable or has unexpected
+ * RDY value.
+ */
+static int fun_check_csts_rdy(struct fun_dev *fdev, unsigned int expected_rdy)
+{
+ u32 csts = readl(fdev->bar + NVME_REG_CSTS);
+ u32 actual_rdy = csts & NVME_CSTS_RDY;
+
+ if (csts == ~0) {
+ dev_err(fdev->dev, "CSTS register read %#x\n", csts);
+ return -EIO;
+ }
+ if (actual_rdy != expected_rdy) {
+ dev_err(fdev->dev, "Unexpected CSTS RDY %u\n", actual_rdy);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Check that CSTS RDY has the expected value. Then write a new value to the CC
+ * register and wait for CSTS RDY to match the new CC ENABLE state.
+ */
+static int fun_update_cc_enable(struct fun_dev *fdev, unsigned int initial_rdy)
+{
+ int rc = fun_check_csts_rdy(fdev, initial_rdy);
+
+ if (rc)
+ return rc;
+ writel(fdev->cc_reg, fdev->bar + NVME_REG_CC);
+ return fun_wait_ready(fdev, !!(fdev->cc_reg & NVME_CC_ENABLE));
+}
+
+static int fun_disable_ctrl(struct fun_dev *fdev)
+{
+ fdev->cc_reg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE);
+ return fun_update_cc_enable(fdev, 1);
+}
+
+static int fun_enable_ctrl(struct fun_dev *fdev, u32 admin_cqesz_log2,
+ u32 admin_sqesz_log2)
+{
+ fdev->cc_reg = (admin_cqesz_log2 << NVME_CC_IOCQES_SHIFT) |
+ (admin_sqesz_log2 << NVME_CC_IOSQES_SHIFT) |
+ ((PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT) |
+ NVME_CC_ENABLE;
+
+ return fun_update_cc_enable(fdev, 0);
+}
+
+static int fun_map_bars(struct fun_dev *fdev, const char *name)
+{
+ struct pci_dev *pdev = to_pci_dev(fdev->dev);
+ int err;
+
+ err = pci_request_mem_regions(pdev, name);
+ if (err) {
+ dev_err(&pdev->dev,
+ "Couldn't get PCI memory resources, err %d\n", err);
+ return err;
+ }
+
+ fdev->bar = pci_ioremap_bar(pdev, 0);
+ if (!fdev->bar) {
+ dev_err(&pdev->dev, "Couldn't map BAR 0\n");
+ pci_release_mem_regions(pdev);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void fun_unmap_bars(struct fun_dev *fdev)
+{
+ struct pci_dev *pdev = to_pci_dev(fdev->dev);
+
+ if (fdev->bar) {
+ iounmap(fdev->bar);
+ fdev->bar = NULL;
+ pci_release_mem_regions(pdev);
+ }
+}
+
+static int fun_set_dma_masks(struct device *dev)
+{
+ int err;
+
+ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ if (err)
+ dev_err(dev, "DMA mask configuration failed, err %d\n", err);
+ return err;
+}
+
+static irqreturn_t fun_admin_irq(int irq, void *data)
+{
+ struct fun_queue *funq = data;
+
+ return fun_process_cq(funq, 0) ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static void fun_complete_admin_cmd(struct fun_queue *funq, void *data,
+ void *entry, const struct fun_cqe_info *info)
+{
+ const struct fun_admin_rsp_common *rsp_common = entry;
+ struct fun_dev *fdev = funq->fdev;
+ struct fun_cmd_ctx *cmd_ctx;
+ int cpu;
+ u16 cid;
+
+ if (info->sqhd == cpu_to_be16(0xffff)) {
+ dev_dbg(fdev->dev, "adminq event");
+ if (fdev->adminq_cb)
+ fdev->adminq_cb(fdev, entry);
+ return;
+ }
+
+ cid = be16_to_cpu(rsp_common->cid);
+ dev_dbg(fdev->dev, "admin CQE cid %u, op %u, ret %u\n", cid,
+ rsp_common->op, rsp_common->ret);
+
+ cmd_ctx = &fdev->cmd_ctx[cid];
+ if (cmd_ctx->cpu < 0) {
+ dev_err(fdev->dev,
+ "admin CQE with CID=%u, op=%u does not match a pending command\n",
+ cid, rsp_common->op);
+ return;
+ }
+
+ if (cmd_ctx->cb)
+ cmd_ctx->cb(fdev, entry, xchg(&cmd_ctx->cb_data, NULL));
+
+ cpu = cmd_ctx->cpu;
+ cmd_ctx->cpu = -1;
+ sbitmap_queue_clear(&fdev->admin_sbq, cid, cpu);
+}
+
+static int fun_init_cmd_ctx(struct fun_dev *fdev, unsigned int ntags)
+{
+ unsigned int i;
+
+ fdev->cmd_ctx = kvcalloc(ntags, sizeof(*fdev->cmd_ctx), GFP_KERNEL);
+ if (!fdev->cmd_ctx)
+ return -ENOMEM;
+
+ for (i = 0; i < ntags; i++)
+ fdev->cmd_ctx[i].cpu = -1;
+
+ return 0;
+}
+
+/* Allocate and enable an admin queue and assign it the first IRQ vector. */
+static int fun_enable_admin_queue(struct fun_dev *fdev,
+ const struct fun_dev_params *areq)
+{
+ struct fun_queue_alloc_req qreq = {
+ .cqe_size_log2 = areq->cqe_size_log2,
+ .sqe_size_log2 = areq->sqe_size_log2,
+ .cq_depth = areq->cq_depth,
+ .sq_depth = areq->sq_depth,
+ .rq_depth = areq->rq_depth,
+ };
+ unsigned int ntags = areq->sq_depth - 1;
+ struct fun_queue *funq;
+ int rc;
+
+ if (fdev->admin_q)
+ return -EEXIST;
+
+ if (areq->sq_depth < AQA_MIN_QUEUE_SIZE ||
+ areq->sq_depth > AQA_MAX_QUEUE_SIZE ||
+ areq->cq_depth < AQA_MIN_QUEUE_SIZE ||
+ areq->cq_depth > AQA_MAX_QUEUE_SIZE)
+ return -EINVAL;
+
+ fdev->admin_q = fun_alloc_queue(fdev, 0, &qreq);
+ if (!fdev->admin_q)
+ return -ENOMEM;
+
+ rc = fun_init_cmd_ctx(fdev, ntags);
+ if (rc)
+ goto free_q;
+
+ rc = sbitmap_queue_init_node(&fdev->admin_sbq, ntags, -1, false,
+ GFP_KERNEL, dev_to_node(fdev->dev));
+ if (rc)
+ goto free_cmd_ctx;
+
+ funq = fdev->admin_q;
+ funq->cq_vector = 0;
+ rc = fun_request_irq(funq, dev_name(fdev->dev), fun_admin_irq, funq);
+ if (rc)
+ goto free_sbq;
+
+ fun_set_cq_callback(funq, fun_complete_admin_cmd, NULL);
+ fdev->adminq_cb = areq->event_cb;
+
+ writel((funq->sq_depth - 1) << AQA_ASQS_SHIFT |
+ (funq->cq_depth - 1) << AQA_ACQS_SHIFT,
+ fdev->bar + NVME_REG_AQA);
+
+ writeq(funq->sq_dma_addr, fdev->bar + NVME_REG_ASQ);
+ writeq(funq->cq_dma_addr, fdev->bar + NVME_REG_ACQ);
+
+ rc = fun_enable_ctrl(fdev, areq->cqe_size_log2, areq->sqe_size_log2);
+ if (rc)
+ goto free_irq;
+
+ if (areq->rq_depth) {
+ rc = fun_create_rq(funq);
+ if (rc)
+ goto disable_ctrl;
+
+ funq_rq_post(funq);
+ }
+
+ return 0;
+
+disable_ctrl:
+ fun_disable_ctrl(fdev);
+free_irq:
+ fun_free_irq(funq);
+free_sbq:
+ sbitmap_queue_free(&fdev->admin_sbq);
+free_cmd_ctx:
+ kvfree(fdev->cmd_ctx);
+ fdev->cmd_ctx = NULL;
+free_q:
+ fun_free_queue(fdev->admin_q);
+ fdev->admin_q = NULL;
+ return rc;
+}
+
+static void fun_disable_admin_queue(struct fun_dev *fdev)
+{
+ struct fun_queue *admq = fdev->admin_q;
+
+ if (!admq)
+ return;
+
+ fun_disable_ctrl(fdev);
+
+ fun_free_irq(admq);
+ __fun_process_cq(admq, 0);
+
+ sbitmap_queue_free(&fdev->admin_sbq);
+
+ kvfree(fdev->cmd_ctx);
+ fdev->cmd_ctx = NULL;
+
+ fun_free_queue(admq);
+ fdev->admin_q = NULL;
+}
+
+/* Return %true if the admin queue has stopped servicing commands as can be
+ * detected through registers. This isn't exhaustive and may provide false
+ * negatives.
+ */
+static bool fun_adminq_stopped(struct fun_dev *fdev)
+{
+ u32 csts = readl(fdev->bar + NVME_REG_CSTS);
+
+ return (csts & (NVME_CSTS_CFS | NVME_CSTS_RDY)) != NVME_CSTS_RDY;
+}
+
+static int fun_wait_for_tag(struct fun_dev *fdev, int *cpup)
+{
+ struct sbitmap_queue *sbq = &fdev->admin_sbq;
+ struct sbq_wait_state *ws = &sbq->ws[0];
+ DEFINE_SBQ_WAIT(wait);
+ int tag;
+
+ for (;;) {
+ sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_UNINTERRUPTIBLE);
+ if (fdev->suppress_cmds) {
+ tag = -ESHUTDOWN;
+ break;
+ }
+ tag = sbitmap_queue_get(sbq, cpup);
+ if (tag >= 0)
+ break;
+ schedule();
+ }
+
+ sbitmap_finish_wait(sbq, ws, &wait);
+ return tag;
+}
+
+/* Submit an asynchronous admin command. Caller is responsible for implementing
+ * any waiting or timeout. Upon command completion the callback @cb is called.
+ */
+int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd,
+ fun_admin_callback_t cb, void *cb_data, bool wait_ok)
+{
+ struct fun_queue *funq = fdev->admin_q;
+ unsigned int cmdsize = cmd->len8 * 8;
+ struct fun_cmd_ctx *cmd_ctx;
+ int tag, cpu, rc = 0;
+
+ if (WARN_ON(cmdsize > (1 << funq->sqe_size_log2)))
+ return -EMSGSIZE;
+
+ tag = sbitmap_queue_get(&fdev->admin_sbq, &cpu);
+ if (tag < 0) {
+ if (!wait_ok)
+ return -EAGAIN;
+ tag = fun_wait_for_tag(fdev, &cpu);
+ if (tag < 0)
+ return tag;
+ }
+
+ cmd->cid = cpu_to_be16(tag);
+
+ cmd_ctx = &fdev->cmd_ctx[tag];
+ cmd_ctx->cb = cb;
+ cmd_ctx->cb_data = cb_data;
+
+ spin_lock(&funq->sq_lock);
+
+ if (unlikely(fdev->suppress_cmds)) {
+ rc = -ESHUTDOWN;
+ sbitmap_queue_clear(&fdev->admin_sbq, tag, cpu);
+ } else {
+ cmd_ctx->cpu = cpu;
+ memcpy(fun_sqe_at(funq, funq->sq_tail), cmd, cmdsize);
+
+ dev_dbg(fdev->dev, "admin cmd @ %u: %8ph\n", funq->sq_tail,
+ cmd);
+
+ if (++funq->sq_tail == funq->sq_depth)
+ funq->sq_tail = 0;
+ writel(funq->sq_tail, funq->sq_db);
+ }
+ spin_unlock(&funq->sq_lock);
+ return rc;
+}
+
+/* Abandon a pending admin command by clearing the issuer's callback data.
+ * Failure indicates that the command either has already completed or its
+ * completion is racing with this call.
+ */
+static bool fun_abandon_admin_cmd(struct fun_dev *fd,
+ const struct fun_admin_req_common *cmd,
+ void *cb_data)
+{
+ u16 cid = be16_to_cpu(cmd->cid);
+ struct fun_cmd_ctx *cmd_ctx = &fd->cmd_ctx[cid];
+
+ return cmpxchg(&cmd_ctx->cb_data, cb_data, NULL) == cb_data;
+}
+
+/* Stop submission of new admin commands and wake up any processes waiting for
+ * tags. Already submitted commands are left to complete or time out.
+ */
+static void fun_admin_stop(struct fun_dev *fdev)
+{
+ spin_lock(&fdev->admin_q->sq_lock);
+ fdev->suppress_cmds = true;
+ spin_unlock(&fdev->admin_q->sq_lock);
+ sbitmap_queue_wake_all(&fdev->admin_sbq);
+}
+
+/* The callback for synchronous execution of admin commands. It copies the
+ * command response to the caller's buffer and signals completion.
+ */
+static void fun_admin_cmd_sync_cb(struct fun_dev *fd, void *rsp, void *cb_data)
+{
+ const struct fun_admin_rsp_common *rsp_common = rsp;
+ struct fun_sync_cmd_ctx *ctx = cb_data;
+
+ if (!ctx)
+ return; /* command issuer timed out and left */
+ if (ctx->rsp_buf) {
+ unsigned int rsp_len = rsp_common->len8 * 8;
+
+ if (unlikely(rsp_len > ctx->rsp_len)) {
+ dev_err(fd->dev,
+ "response for op %u is %uB > response buffer %uB\n",
+ rsp_common->op, rsp_len, ctx->rsp_len);
+ rsp_len = ctx->rsp_len;
+ }
+ memcpy(ctx->rsp_buf, rsp, rsp_len);
+ }
+ ctx->rsp_status = rsp_common->ret;
+ complete(&ctx->compl);
+}
+
+/* Submit a synchronous admin command. */
+int fun_submit_admin_sync_cmd(struct fun_dev *fdev,
+ struct fun_admin_req_common *cmd, void *rsp,
+ size_t rspsize, unsigned int timeout)
+{
+ struct fun_sync_cmd_ctx ctx = {
+ .compl = COMPLETION_INITIALIZER_ONSTACK(ctx.compl),
+ .rsp_buf = rsp,
+ .rsp_len = rspsize,
+ };
+ unsigned int cmdlen = cmd->len8 * 8;
+ unsigned long jiffies_left;
+ int ret;
+
+ ret = fun_submit_admin_cmd(fdev, cmd, fun_admin_cmd_sync_cb, &ctx,
+ true);
+ if (ret)
+ return ret;
+
+ if (!timeout)
+ timeout = FUN_ADMIN_CMD_TO_MS;
+
+ jiffies_left = wait_for_completion_timeout(&ctx.compl,
+ msecs_to_jiffies(timeout));
+ if (!jiffies_left) {
+ /* The command timed out. Attempt to cancel it so we can return.
+ * But if the command is in the process of completing we'll
+ * wait for it.
+ */
+ if (fun_abandon_admin_cmd(fdev, cmd, &ctx)) {
+ dev_err(fdev->dev, "admin command timed out: %*ph\n",
+ cmdlen, cmd);
+ fun_admin_stop(fdev);
+ /* see if the timeout was due to a queue failure */
+ if (fun_adminq_stopped(fdev))
+ dev_err(fdev->dev,
+ "device does not accept admin commands\n");
+
+ return -ETIMEDOUT;
+ }
+ wait_for_completion(&ctx.compl);
+ }
+
+ if (ctx.rsp_status) {
+ dev_err(fdev->dev, "admin command failed, err %d: %*ph\n",
+ ctx.rsp_status, cmdlen, cmd);
+ }
+
+ return -ctx.rsp_status;
+}
+EXPORT_SYMBOL_GPL(fun_submit_admin_sync_cmd);
+
+/* Return the number of device resources of the requested type. */
+int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res)
+{
+ union {
+ struct fun_admin_res_count_req req;
+ struct fun_admin_res_count_rsp rsp;
+ } cmd;
+ int rc;
+
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(cmd.req));
+ cmd.req.count = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_RES_COUNT,
+ 0, 0);
+
+ rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common, &cmd.rsp,
+ sizeof(cmd), 0);
+ return rc ? rc : be32_to_cpu(cmd.rsp.count.data);
+}
+EXPORT_SYMBOL_GPL(fun_get_res_count);
+
+/* Request that the instance of resource @res with the given id be deleted. */
+int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res,
+ unsigned int flags, u32 id)
+{
+ struct fun_admin_generic_destroy_req req = {
+ .common = FUN_ADMIN_REQ_COMMON_INIT2(res, sizeof(req)),
+ .destroy = FUN_ADMIN_SIMPLE_SUBOP_INIT(FUN_ADMIN_SUBOP_DESTROY,
+ flags, id)
+ };
+
+ return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0);
+}
+EXPORT_SYMBOL_GPL(fun_res_destroy);
+
+/* Bind two entities of the given types and IDs. */
+int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0,
+ unsigned int id0, enum fun_admin_bind_type type1,
+ unsigned int id1)
+{
+ struct {
+ struct fun_admin_bind_req req;
+ struct fun_admin_bind_entry entry[2];
+ } cmd = {
+ .req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_BIND,
+ sizeof(cmd)),
+ .entry[0] = FUN_ADMIN_BIND_ENTRY_INIT(type0, id0),
+ .entry[1] = FUN_ADMIN_BIND_ENTRY_INIT(type1, id1),
+ };
+
+ return fun_submit_admin_sync_cmd(fdev, &cmd.req.common, NULL, 0, 0);
+}
+EXPORT_SYMBOL_GPL(fun_bind);
+
+static int fun_get_dev_limits(struct fun_dev *fdev)
+{
+ struct pci_dev *pdev = to_pci_dev(fdev->dev);
+ unsigned int cq_count, sq_count, num_dbs;
+ int rc;
+
+ rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPCQ);
+ if (rc < 0)
+ return rc;
+ cq_count = rc;
+
+ rc = fun_get_res_count(fdev, FUN_ADMIN_OP_EPSQ);
+ if (rc < 0)
+ return rc;
+ sq_count = rc;
+
+ /* The admin queue consumes 1 CQ and at least 1 SQ. To be usable the
+ * device must provide additional queues.
+ */
+ if (cq_count < 2 || sq_count < 2 + !!fdev->admin_q->rq_depth)
+ return -EINVAL;
+
+ /* Calculate the max QID based on SQ/CQ/doorbell counts.
+ * SQ/CQ doorbells alternate.
+ */
+ num_dbs = (pci_resource_len(pdev, 0) - NVME_REG_DBS) >>
+ (2 + NVME_CAP_STRIDE(fdev->cap_reg));
+ fdev->max_qid = min3(cq_count, sq_count, num_dbs / 2) - 1;
+ fdev->kern_end_qid = fdev->max_qid + 1;
+ return 0;
+}
+
+/* Allocate all MSI-X vectors available on a function and at least @min_vecs. */
+static int fun_alloc_irqs(struct pci_dev *pdev, unsigned int min_vecs)
+{
+ int vecs, num_msix = pci_msix_vec_count(pdev);
+
+ if (num_msix < 0)
+ return num_msix;
+ if (min_vecs > num_msix)
+ return -ERANGE;
+
+ vecs = pci_alloc_irq_vectors(pdev, min_vecs, num_msix, PCI_IRQ_MSIX);
+ if (vecs > 0) {
+ dev_info(&pdev->dev,
+ "Allocated %d IRQ vectors of %d requested\n",
+ vecs, num_msix);
+ } else {
+ dev_err(&pdev->dev,
+ "Unable to allocate at least %u IRQ vectors\n",
+ min_vecs);
+ }
+ return vecs;
+}
+
+/* Allocate and initialize the IRQ manager state. */
+static int fun_alloc_irq_mgr(struct fun_dev *fdev)
+{
+ fdev->irq_map = bitmap_zalloc(fdev->num_irqs, GFP_KERNEL);
+ if (!fdev->irq_map)
+ return -ENOMEM;
+
+ spin_lock_init(&fdev->irqmgr_lock);
+ /* mark IRQ 0 allocated, it is used by the admin queue */
+ __set_bit(0, fdev->irq_map);
+ fdev->irqs_avail = fdev->num_irqs - 1;
+ return 0;
+}
+
+/* Reserve @nirqs of the currently available IRQs and return their indices. */
+int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs, u16 *irq_indices)
+{
+ unsigned int b, n = 0;
+ int err = -ENOSPC;
+
+ if (!nirqs)
+ return 0;
+
+ spin_lock(&fdev->irqmgr_lock);
+ if (nirqs > fdev->irqs_avail)
+ goto unlock;
+
+ for_each_clear_bit(b, fdev->irq_map, fdev->num_irqs) {
+ __set_bit(b, fdev->irq_map);
+ irq_indices[n++] = b;
+ if (n >= nirqs)
+ break;
+ }
+
+ WARN_ON(n < nirqs);
+ fdev->irqs_avail -= n;
+ err = n;
+unlock:
+ spin_unlock(&fdev->irqmgr_lock);
+ return err;
+}
+EXPORT_SYMBOL(fun_reserve_irqs);
+
+/* Release @nirqs previously allocated IRQS with the supplied indices. */
+void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs,
+ u16 *irq_indices)
+{
+ unsigned int i;
+
+ spin_lock(&fdev->irqmgr_lock);
+ for (i = 0; i < nirqs; i++)
+ __clear_bit(irq_indices[i], fdev->irq_map);
+ fdev->irqs_avail += nirqs;
+ spin_unlock(&fdev->irqmgr_lock);
+}
+EXPORT_SYMBOL(fun_release_irqs);
+
+static void fun_serv_handler(struct work_struct *work)
+{
+ struct fun_dev *fd = container_of(work, struct fun_dev, service_task);
+
+ if (test_bit(FUN_SERV_DISABLED, &fd->service_flags))
+ return;
+ if (fd->serv_cb)
+ fd->serv_cb(fd);
+}
+
+void fun_serv_stop(struct fun_dev *fd)
+{
+ set_bit(FUN_SERV_DISABLED, &fd->service_flags);
+ cancel_work_sync(&fd->service_task);
+}
+EXPORT_SYMBOL_GPL(fun_serv_stop);
+
+void fun_serv_restart(struct fun_dev *fd)
+{
+ clear_bit(FUN_SERV_DISABLED, &fd->service_flags);
+ if (fd->service_flags)
+ schedule_work(&fd->service_task);
+}
+EXPORT_SYMBOL_GPL(fun_serv_restart);
+
+void fun_serv_sched(struct fun_dev *fd)
+{
+ if (!test_bit(FUN_SERV_DISABLED, &fd->service_flags))
+ schedule_work(&fd->service_task);
+}
+EXPORT_SYMBOL_GPL(fun_serv_sched);
+
+/* Check and try to get the device into a proper state for initialization,
+ * i.e., CSTS.RDY = CC.EN = 0.
+ */
+static int sanitize_dev(struct fun_dev *fdev)
+{
+ int rc;
+
+ fdev->cap_reg = readq(fdev->bar + NVME_REG_CAP);
+ fdev->cc_reg = readl(fdev->bar + NVME_REG_CC);
+
+ /* First get RDY to agree with the current EN. Give RDY the opportunity
+ * to complete a potential recent EN change.
+ */
+ rc = fun_wait_ready(fdev, fdev->cc_reg & NVME_CC_ENABLE);
+ if (rc)
+ return rc;
+
+ /* Next, reset the device if EN is currently 1. */
+ if (fdev->cc_reg & NVME_CC_ENABLE)
+ rc = fun_disable_ctrl(fdev);
+
+ return rc;
+}
+
+/* Undo the device initialization of fun_dev_enable(). */
+void fun_dev_disable(struct fun_dev *fdev)
+{
+ struct pci_dev *pdev = to_pci_dev(fdev->dev);
+
+ pci_set_drvdata(pdev, NULL);
+
+ if (fdev->fw_handle != FUN_HCI_ID_INVALID) {
+ fun_res_destroy(fdev, FUN_ADMIN_OP_SWUPGRADE, 0,
+ fdev->fw_handle);
+ fdev->fw_handle = FUN_HCI_ID_INVALID;
+ }
+
+ fun_disable_admin_queue(fdev);
+
+ bitmap_free(fdev->irq_map);
+ pci_free_irq_vectors(pdev);
+
+ pci_clear_master(pdev);
+ pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
+
+ fun_unmap_bars(fdev);
+}
+EXPORT_SYMBOL(fun_dev_disable);
+
+/* Perform basic initialization of a device, including
+ * - PCI config space setup and BAR0 mapping
+ * - interrupt management initialization
+ * - 1 admin queue setup
+ * - determination of some device limits, such as number of queues.
+ */
+int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev,
+ const struct fun_dev_params *areq, const char *name)
+{
+ int rc;
+
+ fdev->dev = &pdev->dev;
+ rc = fun_map_bars(fdev, name);
+ if (rc)
+ return rc;
+
+ rc = fun_set_dma_masks(fdev->dev);
+ if (rc)
+ goto unmap;
+
+ rc = pci_enable_device_mem(pdev);
+ if (rc) {
+ dev_err(&pdev->dev, "Couldn't enable device, err %d\n", rc);
+ goto unmap;
+ }
+
+ pci_enable_pcie_error_reporting(pdev);
+
+ rc = sanitize_dev(fdev);
+ if (rc)
+ goto disable_dev;
+
+ fdev->fw_handle = FUN_HCI_ID_INVALID;
+ fdev->q_depth = NVME_CAP_MQES(fdev->cap_reg) + 1;
+ fdev->db_stride = 1 << NVME_CAP_STRIDE(fdev->cap_reg);
+ fdev->dbs = fdev->bar + NVME_REG_DBS;
+
+ INIT_WORK(&fdev->service_task, fun_serv_handler);
+ fdev->service_flags = FUN_SERV_DISABLED;
+ fdev->serv_cb = areq->serv_cb;
+
+ rc = fun_alloc_irqs(pdev, areq->min_msix + 1); /* +1 for admin CQ */
+ if (rc < 0)
+ goto disable_dev;
+ fdev->num_irqs = rc;
+
+ rc = fun_alloc_irq_mgr(fdev);
+ if (rc)
+ goto free_irqs;
+
+ pci_set_master(pdev);
+ rc = fun_enable_admin_queue(fdev, areq);
+ if (rc)
+ goto free_irq_mgr;
+
+ rc = fun_get_dev_limits(fdev);
+ if (rc < 0)
+ goto disable_admin;
+
+ pci_save_state(pdev);
+ pci_set_drvdata(pdev, fdev);
+ pcie_print_link_status(pdev);
+ dev_dbg(fdev->dev, "q_depth %u, db_stride %u, max qid %d kern_end_qid %d\n",
+ fdev->q_depth, fdev->db_stride, fdev->max_qid,
+ fdev->kern_end_qid);
+ return 0;
+
+disable_admin:
+ fun_disable_admin_queue(fdev);
+free_irq_mgr:
+ pci_clear_master(pdev);
+ bitmap_free(fdev->irq_map);
+free_irqs:
+ pci_free_irq_vectors(pdev);
+disable_dev:
+ pci_disable_pcie_error_reporting(pdev);
+ pci_disable_device(pdev);
+unmap:
+ fun_unmap_bars(fdev);
+ return rc;
+}
+EXPORT_SYMBOL(fun_dev_enable);
+
+MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>");
+MODULE_DESCRIPTION("Core services driver for Fungible devices");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/fungible/funcore/fun_dev.h b/drivers/net/ethernet/fungible/funcore/fun_dev.h
new file mode 100644
index 000000000..9e8c17ce8
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funcore/fun_dev.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef _FUNDEV_H
+#define _FUNDEV_H
+
+#include <linux/sbitmap.h>
+#include <linux/spinlock_types.h>
+#include <linux/workqueue.h>
+#include "fun_hci.h"
+
+struct pci_dev;
+struct fun_dev;
+struct fun_queue;
+struct fun_cmd_ctx;
+struct fun_queue_alloc_req;
+
+/* doorbell fields */
+enum {
+ FUN_DB_QIDX_S = 0,
+ FUN_DB_INTCOAL_ENTRIES_S = 16,
+ FUN_DB_INTCOAL_ENTRIES_M = 0x7f,
+ FUN_DB_INTCOAL_USEC_S = 23,
+ FUN_DB_INTCOAL_USEC_M = 0x7f,
+ FUN_DB_IRQ_S = 30,
+ FUN_DB_IRQ_F = 1 << FUN_DB_IRQ_S,
+ FUN_DB_IRQ_ARM_S = 31,
+ FUN_DB_IRQ_ARM_F = 1U << FUN_DB_IRQ_ARM_S
+};
+
+/* Callback for asynchronous admin commands.
+ * Invoked on reception of command response.
+ */
+typedef void (*fun_admin_callback_t)(struct fun_dev *fdev, void *rsp,
+ void *cb_data);
+
+/* Callback for events/notifications received by an admin queue. */
+typedef void (*fun_admin_event_cb)(struct fun_dev *fdev, void *cqe);
+
+/* Callback for pending work handled by the service task. */
+typedef void (*fun_serv_cb)(struct fun_dev *fd);
+
+/* service task flags */
+enum {
+ FUN_SERV_DISABLED, /* service task is disabled */
+ FUN_SERV_FIRST_AVAIL
+};
+
+/* Driver state associated with a PCI function. */
+struct fun_dev {
+ struct device *dev;
+
+ void __iomem *bar; /* start of BAR0 mapping */
+ u32 __iomem *dbs; /* start of doorbells in BAR0 mapping */
+
+ /* admin queue */
+ struct fun_queue *admin_q;
+ struct sbitmap_queue admin_sbq;
+ struct fun_cmd_ctx *cmd_ctx;
+ fun_admin_event_cb adminq_cb;
+ bool suppress_cmds; /* if set don't write commands to SQ */
+
+ /* address increment between consecutive doorbells, in 4B units */
+ unsigned int db_stride;
+
+ /* SW versions of device registers */
+ u32 cc_reg; /* CC register */
+ u64 cap_reg; /* CAPability register */
+
+ unsigned int q_depth; /* max queue depth supported by device */
+ unsigned int max_qid; /* = #queues - 1, separately for SQs and CQs */
+ unsigned int kern_end_qid; /* last qid in the kernel range + 1 */
+
+ unsigned int fw_handle;
+
+ /* IRQ manager */
+ unsigned int num_irqs;
+ unsigned int irqs_avail;
+ spinlock_t irqmgr_lock;
+ unsigned long *irq_map;
+
+ /* The service task handles work that needs a process context */
+ struct work_struct service_task;
+ unsigned long service_flags;
+ fun_serv_cb serv_cb;
+};
+
+struct fun_dev_params {
+ u8 cqe_size_log2; /* admin q CQE size */
+ u8 sqe_size_log2; /* admin q SQE size */
+
+ /* admin q depths */
+ u16 cq_depth;
+ u16 sq_depth;
+ u16 rq_depth;
+
+ u16 min_msix; /* min vectors needed by requesting driver */
+
+ fun_admin_event_cb event_cb;
+ fun_serv_cb serv_cb;
+};
+
+/* Return the BAR address of a doorbell. */
+static inline u32 __iomem *fun_db_addr(const struct fun_dev *fdev,
+ unsigned int db_index)
+{
+ return &fdev->dbs[db_index * fdev->db_stride];
+}
+
+/* Return the BAR address of an SQ doorbell. SQ and CQ DBs alternate,
+ * SQs have even DB indices.
+ */
+static inline u32 __iomem *fun_sq_db_addr(const struct fun_dev *fdev,
+ unsigned int sqid)
+{
+ return fun_db_addr(fdev, sqid * 2);
+}
+
+static inline u32 __iomem *fun_cq_db_addr(const struct fun_dev *fdev,
+ unsigned int cqid)
+{
+ return fun_db_addr(fdev, cqid * 2 + 1);
+}
+
+int fun_get_res_count(struct fun_dev *fdev, enum fun_admin_op res);
+int fun_res_destroy(struct fun_dev *fdev, enum fun_admin_op res,
+ unsigned int flags, u32 id);
+int fun_bind(struct fun_dev *fdev, enum fun_admin_bind_type type0,
+ unsigned int id0, enum fun_admin_bind_type type1,
+ unsigned int id1);
+
+int fun_submit_admin_cmd(struct fun_dev *fdev, struct fun_admin_req_common *cmd,
+ fun_admin_callback_t cb, void *cb_data, bool wait_ok);
+int fun_submit_admin_sync_cmd(struct fun_dev *fdev,
+ struct fun_admin_req_common *cmd, void *rsp,
+ size_t rspsize, unsigned int timeout);
+
+int fun_dev_enable(struct fun_dev *fdev, struct pci_dev *pdev,
+ const struct fun_dev_params *areq, const char *name);
+void fun_dev_disable(struct fun_dev *fdev);
+
+int fun_reserve_irqs(struct fun_dev *fdev, unsigned int nirqs,
+ u16 *irq_indices);
+void fun_release_irqs(struct fun_dev *fdev, unsigned int nirqs,
+ u16 *irq_indices);
+
+void fun_serv_stop(struct fun_dev *fd);
+void fun_serv_restart(struct fun_dev *fd);
+void fun_serv_sched(struct fun_dev *fd);
+
+#endif /* _FUNDEV_H */
diff --git a/drivers/net/ethernet/fungible/funcore/fun_hci.h b/drivers/net/ethernet/fungible/funcore/fun_hci.h
new file mode 100644
index 000000000..f21819670
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funcore/fun_hci.h
@@ -0,0 +1,1242 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef __FUN_HCI_H
+#define __FUN_HCI_H
+
+enum {
+ FUN_HCI_ID_INVALID = 0xffffffff,
+};
+
+enum fun_admin_op {
+ FUN_ADMIN_OP_BIND = 0x1,
+ FUN_ADMIN_OP_EPCQ = 0x11,
+ FUN_ADMIN_OP_EPSQ = 0x12,
+ FUN_ADMIN_OP_PORT = 0x13,
+ FUN_ADMIN_OP_ETH = 0x14,
+ FUN_ADMIN_OP_VI = 0x15,
+ FUN_ADMIN_OP_SWUPGRADE = 0x1f,
+ FUN_ADMIN_OP_RSS = 0x21,
+ FUN_ADMIN_OP_ADI = 0x25,
+ FUN_ADMIN_OP_KTLS = 0x26,
+};
+
+enum {
+ FUN_REQ_COMMON_FLAG_RSP = 0x1,
+ FUN_REQ_COMMON_FLAG_HEAD_WB = 0x2,
+ FUN_REQ_COMMON_FLAG_INT = 0x4,
+ FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF = 0x8,
+};
+
+struct fun_admin_req_common {
+ __u8 op;
+ __u8 len8;
+ __be16 flags;
+ __u8 suboff8;
+ __u8 rsvd0;
+ __be16 cid;
+};
+
+#define FUN_ADMIN_REQ_COMMON_INIT(_op, _len8, _flags, _suboff8, _cid) \
+ (struct fun_admin_req_common) { \
+ .op = (_op), .len8 = (_len8), .flags = cpu_to_be16(_flags), \
+ .suboff8 = (_suboff8), .cid = cpu_to_be16(_cid), \
+ }
+
+#define FUN_ADMIN_REQ_COMMON_INIT2(_op, _len) \
+ (struct fun_admin_req_common) { \
+ .op = (_op), .len8 = (_len) / 8, \
+ }
+
+struct fun_admin_rsp_common {
+ __u8 op;
+ __u8 len8;
+ __be16 flags;
+ __u8 suboff8;
+ __u8 ret;
+ __be16 cid;
+};
+
+struct fun_admin_write48_req {
+ __be64 key_to_data;
+};
+
+#define FUN_ADMIN_WRITE48_REQ_KEY_S 56U
+#define FUN_ADMIN_WRITE48_REQ_KEY_M 0xff
+#define FUN_ADMIN_WRITE48_REQ_KEY_P_NOSWAP(x) \
+ (((__u64)x) << FUN_ADMIN_WRITE48_REQ_KEY_S)
+
+#define FUN_ADMIN_WRITE48_REQ_DATA_S 0U
+#define FUN_ADMIN_WRITE48_REQ_DATA_M 0xffffffffffff
+#define FUN_ADMIN_WRITE48_REQ_DATA_P_NOSWAP(x) \
+ (((__u64)x) << FUN_ADMIN_WRITE48_REQ_DATA_S)
+
+#define FUN_ADMIN_WRITE48_REQ_INIT(key, data) \
+ (struct fun_admin_write48_req) { \
+ .key_to_data = cpu_to_be64( \
+ FUN_ADMIN_WRITE48_REQ_KEY_P_NOSWAP(key) | \
+ FUN_ADMIN_WRITE48_REQ_DATA_P_NOSWAP(data)), \
+ }
+
+struct fun_admin_write48_rsp {
+ __be64 key_to_data;
+};
+
+struct fun_admin_read48_req {
+ __be64 key_pack;
+};
+
+#define FUN_ADMIN_READ48_REQ_KEY_S 56U
+#define FUN_ADMIN_READ48_REQ_KEY_M 0xff
+#define FUN_ADMIN_READ48_REQ_KEY_P_NOSWAP(x) \
+ (((__u64)x) << FUN_ADMIN_READ48_REQ_KEY_S)
+
+#define FUN_ADMIN_READ48_REQ_INIT(key) \
+ (struct fun_admin_read48_req) { \
+ .key_pack = \
+ cpu_to_be64(FUN_ADMIN_READ48_REQ_KEY_P_NOSWAP(key)), \
+ }
+
+struct fun_admin_read48_rsp {
+ __be64 key_to_data;
+};
+
+#define FUN_ADMIN_READ48_RSP_KEY_S 56U
+#define FUN_ADMIN_READ48_RSP_KEY_M 0xff
+#define FUN_ADMIN_READ48_RSP_KEY_G(x) \
+ ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_KEY_S) & \
+ FUN_ADMIN_READ48_RSP_KEY_M)
+
+#define FUN_ADMIN_READ48_RSP_RET_S 48U
+#define FUN_ADMIN_READ48_RSP_RET_M 0xff
+#define FUN_ADMIN_READ48_RSP_RET_G(x) \
+ ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_RET_S) & \
+ FUN_ADMIN_READ48_RSP_RET_M)
+
+#define FUN_ADMIN_READ48_RSP_DATA_S 0U
+#define FUN_ADMIN_READ48_RSP_DATA_M 0xffffffffffff
+#define FUN_ADMIN_READ48_RSP_DATA_G(x) \
+ ((be64_to_cpu(x) >> FUN_ADMIN_READ48_RSP_DATA_S) & \
+ FUN_ADMIN_READ48_RSP_DATA_M)
+
+enum fun_admin_bind_type {
+ FUN_ADMIN_BIND_TYPE_EPCQ = 0x1,
+ FUN_ADMIN_BIND_TYPE_EPSQ = 0x2,
+ FUN_ADMIN_BIND_TYPE_PORT = 0x3,
+ FUN_ADMIN_BIND_TYPE_RSS = 0x4,
+ FUN_ADMIN_BIND_TYPE_VI = 0x5,
+ FUN_ADMIN_BIND_TYPE_ETH = 0x6,
+};
+
+struct fun_admin_bind_entry {
+ __u8 type;
+ __u8 rsvd0[3];
+ __be32 id;
+};
+
+#define FUN_ADMIN_BIND_ENTRY_INIT(_type, _id) \
+ (struct fun_admin_bind_entry) { \
+ .type = (_type), .id = cpu_to_be32(_id), \
+ }
+
+struct fun_admin_bind_req {
+ struct fun_admin_req_common common;
+ struct fun_admin_bind_entry entry[];
+};
+
+struct fun_admin_bind_rsp {
+ struct fun_admin_rsp_common bind_rsp_common;
+};
+
+struct fun_admin_simple_subop {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 data;
+};
+
+#define FUN_ADMIN_SIMPLE_SUBOP_INIT(_subop, _flags, _data) \
+ (struct fun_admin_simple_subop) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .data = cpu_to_be32(_data), \
+ }
+
+enum fun_admin_subop {
+ FUN_ADMIN_SUBOP_CREATE = 0x10,
+ FUN_ADMIN_SUBOP_DESTROY = 0x11,
+ FUN_ADMIN_SUBOP_MODIFY = 0x12,
+ FUN_ADMIN_SUBOP_RES_COUNT = 0x14,
+ FUN_ADMIN_SUBOP_READ = 0x15,
+ FUN_ADMIN_SUBOP_WRITE = 0x16,
+ FUN_ADMIN_SUBOP_NOTIFY = 0x17,
+};
+
+enum {
+ FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR = 0x1,
+};
+
+struct fun_admin_generic_destroy_req {
+ struct fun_admin_req_common common;
+ struct fun_admin_simple_subop destroy;
+};
+
+struct fun_admin_generic_create_rsp {
+ struct fun_admin_rsp_common common;
+
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+};
+
+struct fun_admin_res_count_req {
+ struct fun_admin_req_common common;
+ struct fun_admin_simple_subop count;
+};
+
+struct fun_admin_res_count_rsp {
+ struct fun_admin_rsp_common common;
+ struct fun_admin_simple_subop count;
+};
+
+enum {
+ FUN_ADMIN_EPCQ_CREATE_FLAG_INT_EPCQ = 0x2,
+ FUN_ADMIN_EPCQ_CREATE_FLAG_ENTRY_WR_TPH = 0x4,
+ FUN_ADMIN_EPCQ_CREATE_FLAG_SL_WR_TPH = 0x8,
+ FUN_ADMIN_EPCQ_CREATE_FLAG_RQ = 0x80,
+ FUN_ADMIN_EPCQ_CREATE_FLAG_INT_IQ = 0x100,
+ FUN_ADMIN_EPCQ_CREATE_FLAG_INT_NOARM = 0x200,
+ FUN_ADMIN_EPCQ_CREATE_FLAG_DROP_ON_OVERFLOW = 0x400,
+};
+
+struct fun_admin_epcq_req {
+ struct fun_admin_req_common common;
+ union epcq_req_subop {
+ struct fun_admin_epcq_create_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be32 epsqid;
+ __u8 rsvd1;
+ __u8 entry_size_log2;
+ __be16 nentries;
+
+ __be64 address;
+
+ __be16 tailroom; /* per packet tailroom in bytes */
+ __u8 headroom; /* per packet headroom in 2B units */
+ __u8 intcoal_kbytes;
+ __u8 intcoal_holdoff_nentries;
+ __u8 intcoal_holdoff_usecs;
+ __be16 intid;
+
+ __be32 scan_start_id;
+ __be32 scan_end_id;
+
+ __be16 tph_cpuid;
+ __u8 rsvd3[6];
+ } create;
+
+ struct fun_admin_epcq_modify_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be16 headroom; /* headroom in bytes */
+ __u8 rsvd1[6];
+ } modify;
+ } u;
+};
+
+#define FUN_ADMIN_EPCQ_CREATE_REQ_INIT( \
+ _subop, _flags, _id, _epsqid, _entry_size_log2, _nentries, _address, \
+ _tailroom, _headroom, _intcoal_kbytes, _intcoal_holdoff_nentries, \
+ _intcoal_holdoff_usecs, _intid, _scan_start_id, _scan_end_id, \
+ _tph_cpuid) \
+ (struct fun_admin_epcq_create_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .epsqid = cpu_to_be32(_epsqid), \
+ .entry_size_log2 = _entry_size_log2, \
+ .nentries = cpu_to_be16(_nentries), \
+ .address = cpu_to_be64(_address), \
+ .tailroom = cpu_to_be16(_tailroom), .headroom = _headroom, \
+ .intcoal_kbytes = _intcoal_kbytes, \
+ .intcoal_holdoff_nentries = _intcoal_holdoff_nentries, \
+ .intcoal_holdoff_usecs = _intcoal_holdoff_usecs, \
+ .intid = cpu_to_be16(_intid), \
+ .scan_start_id = cpu_to_be32(_scan_start_id), \
+ .scan_end_id = cpu_to_be32(_scan_end_id), \
+ .tph_cpuid = cpu_to_be16(_tph_cpuid), \
+ }
+
+#define FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(_subop, _flags, _id, _headroom) \
+ (struct fun_admin_epcq_modify_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .headroom = cpu_to_be16(_headroom), \
+ }
+
+enum {
+ FUN_ADMIN_EPSQ_CREATE_FLAG_INT_EPSQ = 0x2,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_ENTRY_RD_TPH = 0x4,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_GL_RD_TPH = 0x8,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS = 0x10,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS_TPH = 0x20,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_EPCQ = 0x40,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_RQ = 0x80,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_INT_IQ = 0x100,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_NO_CMPL = 0x200,
+};
+
+struct fun_admin_epsq_req {
+ struct fun_admin_req_common common;
+
+ union epsq_req_subop {
+ struct fun_admin_epsq_create_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be32 epcqid;
+ __u8 rsvd1;
+ __u8 entry_size_log2;
+ __be16 nentries;
+
+ __be64 address; /* DMA address of epsq */
+
+ __u8 rsvd2[3];
+ __u8 intcoal_kbytes;
+ __u8 intcoal_holdoff_nentries;
+ __u8 intcoal_holdoff_usecs;
+ __be16 intid;
+
+ __be32 scan_start_id;
+ __be32 scan_end_id;
+
+ __u8 rsvd3[4];
+ __be16 tph_cpuid;
+ __u8 buf_size_log2; /* log2 of RQ buffer size */
+ __u8 head_wb_size_log2; /* log2 of head write back size */
+
+ __be64 head_wb_address; /* DMA address for head writeback */
+ } create;
+ } u;
+};
+
+#define FUN_ADMIN_EPSQ_CREATE_REQ_INIT( \
+ _subop, _flags, _id, _epcqid, _entry_size_log2, _nentries, _address, \
+ _intcoal_kbytes, _intcoal_holdoff_nentries, _intcoal_holdoff_usecs, \
+ _intid, _scan_start_id, _scan_end_id, _tph_cpuid, _buf_size_log2, \
+ _head_wb_size_log2, _head_wb_address) \
+ (struct fun_admin_epsq_create_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .epcqid = cpu_to_be32(_epcqid), \
+ .entry_size_log2 = _entry_size_log2, \
+ .nentries = cpu_to_be16(_nentries), \
+ .address = cpu_to_be64(_address), \
+ .intcoal_kbytes = _intcoal_kbytes, \
+ .intcoal_holdoff_nentries = _intcoal_holdoff_nentries, \
+ .intcoal_holdoff_usecs = _intcoal_holdoff_usecs, \
+ .intid = cpu_to_be16(_intid), \
+ .scan_start_id = cpu_to_be32(_scan_start_id), \
+ .scan_end_id = cpu_to_be32(_scan_end_id), \
+ .tph_cpuid = cpu_to_be16(_tph_cpuid), \
+ .buf_size_log2 = _buf_size_log2, \
+ .head_wb_size_log2 = _head_wb_size_log2, \
+ .head_wb_address = cpu_to_be64(_head_wb_address), \
+ }
+
+enum {
+ FUN_PORT_CAP_OFFLOADS = 0x1,
+ FUN_PORT_CAP_STATS = 0x2,
+ FUN_PORT_CAP_LOOPBACK = 0x4,
+ FUN_PORT_CAP_VPORT = 0x8,
+ FUN_PORT_CAP_TX_PAUSE = 0x10,
+ FUN_PORT_CAP_RX_PAUSE = 0x20,
+ FUN_PORT_CAP_AUTONEG = 0x40,
+ FUN_PORT_CAP_RSS = 0x80,
+ FUN_PORT_CAP_VLAN_OFFLOADS = 0x100,
+ FUN_PORT_CAP_ENCAP_OFFLOADS = 0x200,
+ FUN_PORT_CAP_1000_X = 0x1000,
+ FUN_PORT_CAP_10G_R = 0x2000,
+ FUN_PORT_CAP_40G_R4 = 0x4000,
+ FUN_PORT_CAP_25G_R = 0x8000,
+ FUN_PORT_CAP_50G_R2 = 0x10000,
+ FUN_PORT_CAP_50G_R = 0x20000,
+ FUN_PORT_CAP_100G_R4 = 0x40000,
+ FUN_PORT_CAP_100G_R2 = 0x80000,
+ FUN_PORT_CAP_200G_R4 = 0x100000,
+ FUN_PORT_CAP_FEC_NONE = 0x10000000,
+ FUN_PORT_CAP_FEC_FC = 0x20000000,
+ FUN_PORT_CAP_FEC_RS = 0x40000000,
+};
+
+enum fun_port_brkout_mode {
+ FUN_PORT_BRKMODE_NA = 0x0,
+ FUN_PORT_BRKMODE_NONE = 0x1,
+ FUN_PORT_BRKMODE_2X = 0x2,
+ FUN_PORT_BRKMODE_4X = 0x3,
+};
+
+enum {
+ FUN_PORT_SPEED_AUTO = 0x0,
+ FUN_PORT_SPEED_10M = 0x1,
+ FUN_PORT_SPEED_100M = 0x2,
+ FUN_PORT_SPEED_1G = 0x4,
+ FUN_PORT_SPEED_10G = 0x8,
+ FUN_PORT_SPEED_25G = 0x10,
+ FUN_PORT_SPEED_40G = 0x20,
+ FUN_PORT_SPEED_50G = 0x40,
+ FUN_PORT_SPEED_100G = 0x80,
+ FUN_PORT_SPEED_200G = 0x100,
+};
+
+enum fun_port_duplex_mode {
+ FUN_PORT_FULL_DUPLEX = 0x0,
+ FUN_PORT_HALF_DUPLEX = 0x1,
+};
+
+enum {
+ FUN_PORT_FEC_NA = 0x0,
+ FUN_PORT_FEC_OFF = 0x1,
+ FUN_PORT_FEC_RS = 0x2,
+ FUN_PORT_FEC_FC = 0x4,
+ FUN_PORT_FEC_AUTO = 0x8,
+};
+
+enum fun_port_link_status {
+ FUN_PORT_LINK_UP = 0x0,
+ FUN_PORT_LINK_UP_WITH_ERR = 0x1,
+ FUN_PORT_LINK_DOWN = 0x2,
+};
+
+enum fun_port_led_type {
+ FUN_PORT_LED_OFF = 0x0,
+ FUN_PORT_LED_AMBER = 0x1,
+ FUN_PORT_LED_GREEN = 0x2,
+ FUN_PORT_LED_BEACON_ON = 0x3,
+ FUN_PORT_LED_BEACON_OFF = 0x4,
+};
+
+enum {
+ FUN_PORT_FLAG_MAC_DOWN = 0x1,
+ FUN_PORT_FLAG_MAC_UP = 0x2,
+ FUN_PORT_FLAG_NH_DOWN = 0x4,
+ FUN_PORT_FLAG_NH_UP = 0x8,
+};
+
+enum {
+ FUN_PORT_FLAG_ENABLE_NOTIFY = 0x1,
+};
+
+enum fun_port_lane_attr {
+ FUN_PORT_LANE_1 = 0x1,
+ FUN_PORT_LANE_2 = 0x2,
+ FUN_PORT_LANE_4 = 0x4,
+ FUN_PORT_LANE_SPEED_10G = 0x100,
+ FUN_PORT_LANE_SPEED_25G = 0x200,
+ FUN_PORT_LANE_SPEED_50G = 0x400,
+ FUN_PORT_LANE_SPLIT = 0x8000,
+};
+
+enum fun_admin_port_subop {
+ FUN_ADMIN_PORT_SUBOP_XCVR_READ = 0x23,
+ FUN_ADMIN_PORT_SUBOP_INETADDR_EVENT = 0x24,
+};
+
+enum fun_admin_port_key {
+ FUN_ADMIN_PORT_KEY_ILLEGAL = 0x0,
+ FUN_ADMIN_PORT_KEY_MTU = 0x1,
+ FUN_ADMIN_PORT_KEY_FEC = 0x2,
+ FUN_ADMIN_PORT_KEY_SPEED = 0x3,
+ FUN_ADMIN_PORT_KEY_DEBOUNCE = 0x4,
+ FUN_ADMIN_PORT_KEY_DUPLEX = 0x5,
+ FUN_ADMIN_PORT_KEY_MACADDR = 0x6,
+ FUN_ADMIN_PORT_KEY_LINKMODE = 0x7,
+ FUN_ADMIN_PORT_KEY_BREAKOUT = 0x8,
+ FUN_ADMIN_PORT_KEY_ENABLE = 0x9,
+ FUN_ADMIN_PORT_KEY_DISABLE = 0xa,
+ FUN_ADMIN_PORT_KEY_ERR_DISABLE = 0xb,
+ FUN_ADMIN_PORT_KEY_CAPABILITIES = 0xc,
+ FUN_ADMIN_PORT_KEY_LP_CAPABILITIES = 0xd,
+ FUN_ADMIN_PORT_KEY_STATS_DMA_LOW = 0xe,
+ FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH = 0xf,
+ FUN_ADMIN_PORT_KEY_LANE_ATTRS = 0x10,
+ FUN_ADMIN_PORT_KEY_LED = 0x11,
+ FUN_ADMIN_PORT_KEY_ADVERT = 0x12,
+};
+
+struct fun_subop_imm {
+ __u8 subop; /* see fun_data_subop enum */
+ __u8 flags;
+ __u8 nsgl;
+ __u8 rsvd0;
+ __be32 len;
+
+ __u8 data[];
+};
+
+enum fun_subop_sgl_flags {
+ FUN_SUBOP_SGL_USE_OFF8 = 0x1,
+ FUN_SUBOP_FLAG_FREE_BUF = 0x2,
+ FUN_SUBOP_FLAG_IS_REFBUF = 0x4,
+ FUN_SUBOP_SGL_FLAG_LOCAL = 0x8,
+};
+
+enum fun_data_op {
+ FUN_DATAOP_INVALID = 0x0,
+ FUN_DATAOP_SL = 0x1, /* scatter */
+ FUN_DATAOP_GL = 0x2, /* gather */
+ FUN_DATAOP_SGL = 0x3, /* scatter-gather */
+ FUN_DATAOP_IMM = 0x4, /* immediate data */
+ FUN_DATAOP_RQBUF = 0x8, /* rq buffer */
+};
+
+struct fun_dataop_gl {
+ __u8 subop;
+ __u8 flags;
+ __be16 sgl_off;
+ __be32 sgl_len;
+
+ __be64 sgl_data;
+};
+
+static inline void fun_dataop_gl_init(struct fun_dataop_gl *s, u8 flags,
+ u16 sgl_off, u32 sgl_len, u64 sgl_data)
+{
+ s->subop = FUN_DATAOP_GL;
+ s->flags = flags;
+ s->sgl_off = cpu_to_be16(sgl_off);
+ s->sgl_len = cpu_to_be32(sgl_len);
+ s->sgl_data = cpu_to_be64(sgl_data);
+}
+
+struct fun_dataop_imm {
+ __u8 subop;
+ __u8 flags;
+ __be16 rsvd0;
+ __be32 sgl_len;
+};
+
+struct fun_subop_sgl {
+ __u8 subop;
+ __u8 flags;
+ __u8 nsgl;
+ __u8 rsvd0;
+ __be32 sgl_len;
+
+ __be64 sgl_data;
+};
+
+#define FUN_SUBOP_SGL_INIT(_subop, _flags, _nsgl, _sgl_len, _sgl_data) \
+ (struct fun_subop_sgl) { \
+ .subop = (_subop), .flags = (_flags), .nsgl = (_nsgl), \
+ .sgl_len = cpu_to_be32(_sgl_len), \
+ .sgl_data = cpu_to_be64(_sgl_data), \
+ }
+
+struct fun_dataop_rqbuf {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 cid;
+ __be32 bufoff;
+};
+
+struct fun_dataop_hdr {
+ __u8 nsgl;
+ __u8 flags;
+ __u8 ngather;
+ __u8 nscatter;
+ __be32 total_len;
+
+ struct fun_dataop_imm imm[];
+};
+
+#define FUN_DATAOP_HDR_INIT(_nsgl, _flags, _ngather, _nscatter, _total_len) \
+ (struct fun_dataop_hdr) { \
+ .nsgl = _nsgl, .flags = _flags, .ngather = _ngather, \
+ .nscatter = _nscatter, .total_len = cpu_to_be32(_total_len), \
+ }
+
+enum fun_port_inetaddr_event_type {
+ FUN_PORT_INETADDR_ADD = 0x1,
+ FUN_PORT_INETADDR_DEL = 0x2,
+};
+
+enum fun_port_inetaddr_addr_family {
+ FUN_PORT_INETADDR_IPV4 = 0x1,
+ FUN_PORT_INETADDR_IPV6 = 0x2,
+};
+
+struct fun_admin_port_req {
+ struct fun_admin_req_common common;
+
+ union port_req_subop {
+ struct fun_admin_port_create_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+ } create;
+ struct fun_admin_port_write_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id; /* portid */
+
+ struct fun_admin_write48_req write48[];
+ } write;
+ struct fun_admin_port_read_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id; /* portid */
+
+ struct fun_admin_read48_req read48[];
+ } read;
+ struct fun_admin_port_xcvr_read_req {
+ u8 subop;
+ u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ u8 bank;
+ u8 page;
+ u8 offset;
+ u8 length;
+ u8 dev_addr;
+ u8 rsvd1[3];
+ } xcvr_read;
+ struct fun_admin_port_inetaddr_event_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __u8 event_type;
+ __u8 addr_family;
+ __be32 id;
+
+ __u8 addr[];
+ } inetaddr_event;
+ } u;
+};
+
+#define FUN_ADMIN_PORT_CREATE_REQ_INIT(_subop, _flags, _id) \
+ (struct fun_admin_port_create_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), \
+ }
+
+#define FUN_ADMIN_PORT_WRITE_REQ_INIT(_subop, _flags, _id) \
+ (struct fun_admin_port_write_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), \
+ }
+
+#define FUN_ADMIN_PORT_READ_REQ_INIT(_subop, _flags, _id) \
+ (struct fun_admin_port_read_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), \
+ }
+
+#define FUN_ADMIN_PORT_XCVR_READ_REQ_INIT(_flags, _id, _bank, _page, \
+ _offset, _length, _dev_addr) \
+ ((struct fun_admin_port_xcvr_read_req) { \
+ .subop = FUN_ADMIN_PORT_SUBOP_XCVR_READ, \
+ .flags = cpu_to_be16(_flags), .id = cpu_to_be32(_id), \
+ .bank = (_bank), .page = (_page), .offset = (_offset), \
+ .length = (_length), .dev_addr = (_dev_addr), \
+ })
+
+struct fun_admin_port_rsp {
+ struct fun_admin_rsp_common common;
+
+ union port_rsp_subop {
+ struct fun_admin_port_create_rsp {
+ __u8 subop;
+ __u8 rsvd0[3];
+ __be32 id;
+
+ __be16 lport;
+ __u8 rsvd1[6];
+ } create;
+ struct fun_admin_port_write_rsp {
+ __u8 subop;
+ __u8 rsvd0[3];
+ __be32 id; /* portid */
+
+ struct fun_admin_write48_rsp write48[];
+ } write;
+ struct fun_admin_port_read_rsp {
+ __u8 subop;
+ __u8 rsvd0[3];
+ __be32 id; /* portid */
+
+ struct fun_admin_read48_rsp read48[];
+ } read;
+ struct fun_admin_port_inetaddr_event_rsp {
+ __u8 subop;
+ __u8 rsvd0[3];
+ __be32 id; /* portid */
+ } inetaddr_event;
+ } u;
+};
+
+struct fun_admin_port_xcvr_read_rsp {
+ struct fun_admin_rsp_common common;
+
+ u8 subop;
+ u8 rsvd0[3];
+ __be32 id;
+
+ u8 bank;
+ u8 page;
+ u8 offset;
+ u8 length;
+ u8 dev_addr;
+ u8 rsvd1[3];
+
+ u8 data[128];
+};
+
+enum fun_xcvr_type {
+ FUN_XCVR_BASET = 0x0,
+ FUN_XCVR_CU = 0x1,
+ FUN_XCVR_SMF = 0x2,
+ FUN_XCVR_MMF = 0x3,
+ FUN_XCVR_AOC = 0x4,
+ FUN_XCVR_SFPP = 0x10, /* SFP+ or later */
+ FUN_XCVR_QSFPP = 0x11, /* QSFP+ or later */
+ FUN_XCVR_QSFPDD = 0x12, /* QSFP-DD */
+};
+
+struct fun_admin_port_notif {
+ struct fun_admin_rsp_common common;
+
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 id;
+ __be32 speed; /* in 10 Mbps units */
+
+ __u8 link_state;
+ __u8 missed_events;
+ __u8 link_down_reason;
+ __u8 xcvr_type;
+ __u8 flow_ctrl;
+ __u8 fec;
+ __u8 active_lanes;
+ __u8 rsvd1;
+
+ __be64 advertising;
+
+ __be64 lp_advertising;
+};
+
+enum fun_eth_rss_const {
+ FUN_ETH_RSS_MAX_KEY_SIZE = 0x28,
+ FUN_ETH_RSS_MAX_INDIR_ENT = 0x40,
+};
+
+enum fun_eth_hash_alg {
+ FUN_ETH_RSS_ALG_INVALID = 0x0,
+ FUN_ETH_RSS_ALG_TOEPLITZ = 0x1,
+ FUN_ETH_RSS_ALG_CRC32 = 0x2,
+};
+
+struct fun_admin_rss_req {
+ struct fun_admin_req_common common;
+
+ union rss_req_subop {
+ struct fun_admin_rss_create_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be32 rsvd1;
+ __be32 viid; /* VI flow id */
+
+ __be64 metadata[1];
+
+ __u8 alg;
+ __u8 keylen;
+ __u8 indir_nent;
+ __u8 rsvd2;
+ __be16 key_off;
+ __be16 indir_off;
+
+ struct fun_dataop_hdr dataop;
+ } create;
+ } u;
+};
+
+#define FUN_ADMIN_RSS_CREATE_REQ_INIT(_subop, _flags, _id, _viid, _alg, \
+ _keylen, _indir_nent, _key_off, \
+ _indir_off) \
+ (struct fun_admin_rss_create_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .viid = cpu_to_be32(_viid), \
+ .alg = _alg, .keylen = _keylen, .indir_nent = _indir_nent, \
+ .key_off = cpu_to_be16(_key_off), \
+ .indir_off = cpu_to_be16(_indir_off), \
+ }
+
+struct fun_admin_vi_req {
+ struct fun_admin_req_common common;
+
+ union vi_req_subop {
+ struct fun_admin_vi_create_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be32 rsvd1;
+ __be32 portid; /* port flow id */
+ } create;
+ } u;
+};
+
+#define FUN_ADMIN_VI_CREATE_REQ_INIT(_subop, _flags, _id, _portid) \
+ (struct fun_admin_vi_create_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .portid = cpu_to_be32(_portid), \
+ }
+
+struct fun_admin_eth_req {
+ struct fun_admin_req_common common;
+
+ union eth_req_subop {
+ struct fun_admin_eth_create_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be32 rsvd1;
+ __be32 portid; /* port flow id */
+ } create;
+ } u;
+};
+
+#define FUN_ADMIN_ETH_CREATE_REQ_INIT(_subop, _flags, _id, _portid) \
+ (struct fun_admin_eth_create_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .portid = cpu_to_be32(_portid), \
+ }
+
+enum {
+ FUN_ADMIN_SWU_UPGRADE_FLAG_INIT = 0x10,
+ FUN_ADMIN_SWU_UPGRADE_FLAG_COMPLETE = 0x20,
+ FUN_ADMIN_SWU_UPGRADE_FLAG_DOWNGRADE = 0x40,
+ FUN_ADMIN_SWU_UPGRADE_FLAG_ACTIVE_IMAGE = 0x80,
+ FUN_ADMIN_SWU_UPGRADE_FLAG_ASYNC = 0x1,
+};
+
+enum fun_admin_swu_subop {
+ FUN_ADMIN_SWU_SUBOP_GET_VERSION = 0x20,
+ FUN_ADMIN_SWU_SUBOP_UPGRADE = 0x21,
+ FUN_ADMIN_SWU_SUBOP_UPGRADE_DATA = 0x22,
+ FUN_ADMIN_SWU_SUBOP_GET_ALL_VERSIONS = 0x23,
+};
+
+struct fun_admin_swu_req {
+ struct fun_admin_req_common common;
+
+ union swu_req_subop {
+ struct fun_admin_swu_create_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+ } create;
+ struct fun_admin_swu_upgrade_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be32 fourcc;
+ __be32 rsvd1;
+
+ __be64 image_size; /* upgrade image length */
+ } upgrade;
+ struct fun_admin_swu_upgrade_data_req {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be32 offset; /* offset of data in this command */
+ __be32 size; /* total size of data in this command */
+ } upgrade_data;
+ } u;
+
+ struct fun_subop_sgl sgl[]; /* in, out buffers through sgl */
+};
+
+#define FUN_ADMIN_SWU_CREATE_REQ_INIT(_subop, _flags, _id) \
+ (struct fun_admin_swu_create_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), \
+ }
+
+#define FUN_ADMIN_SWU_UPGRADE_REQ_INIT(_subop, _flags, _id, _fourcc, \
+ _image_size) \
+ (struct fun_admin_swu_upgrade_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .fourcc = cpu_to_be32(_fourcc), \
+ .image_size = cpu_to_be64(_image_size), \
+ }
+
+#define FUN_ADMIN_SWU_UPGRADE_DATA_REQ_INIT(_subop, _flags, _id, _offset, \
+ _size) \
+ (struct fun_admin_swu_upgrade_data_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .offset = cpu_to_be32(_offset), \
+ .size = cpu_to_be32(_size), \
+ }
+
+struct fun_admin_swu_rsp {
+ struct fun_admin_rsp_common common;
+
+ union swu_rsp_subop {
+ struct fun_admin_swu_create_rsp {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+ } create;
+ struct fun_admin_swu_upgrade_rsp {
+ __u8 subop;
+ __u8 rsvd0[3];
+ __be32 id;
+
+ __be32 fourcc;
+ __be32 status;
+
+ __be32 progress;
+ __be32 unused;
+ } upgrade;
+ struct fun_admin_swu_upgrade_data_rsp {
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be32 offset;
+ __be32 size;
+ } upgrade_data;
+ } u;
+};
+
+enum fun_ktls_version {
+ FUN_KTLS_TLSV2 = 0x20,
+ FUN_KTLS_TLSV3 = 0x30,
+};
+
+enum fun_ktls_cipher {
+ FUN_KTLS_CIPHER_AES_GCM_128 = 0x33,
+ FUN_KTLS_CIPHER_AES_GCM_256 = 0x34,
+ FUN_KTLS_CIPHER_AES_CCM_128 = 0x35,
+ FUN_KTLS_CIPHER_CHACHA20_POLY1305 = 0x36,
+};
+
+enum fun_ktls_modify_flags {
+ FUN_KTLS_MODIFY_REMOVE = 0x1,
+};
+
+struct fun_admin_ktls_create_req {
+ struct fun_admin_req_common common;
+
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+};
+
+#define FUN_ADMIN_KTLS_CREATE_REQ_INIT(_subop, _flags, _id) \
+ (struct fun_admin_ktls_create_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), \
+ }
+
+struct fun_admin_ktls_create_rsp {
+ struct fun_admin_rsp_common common;
+
+ __u8 subop;
+ __u8 rsvd0[3];
+ __be32 id;
+};
+
+struct fun_admin_ktls_modify_req {
+ struct fun_admin_req_common common;
+
+ __u8 subop;
+ __u8 rsvd0;
+ __be16 flags;
+ __be32 id;
+
+ __be64 tlsid;
+
+ __be32 tcp_seq;
+ __u8 version;
+ __u8 cipher;
+ __u8 rsvd1[2];
+
+ __u8 record_seq[8];
+
+ __u8 key[32];
+
+ __u8 iv[16];
+
+ __u8 salt[8];
+};
+
+#define FUN_ADMIN_KTLS_MODIFY_REQ_INIT(_subop, _flags, _id, _tlsid, _tcp_seq, \
+ _version, _cipher) \
+ (struct fun_admin_ktls_modify_req) { \
+ .subop = (_subop), .flags = cpu_to_be16(_flags), \
+ .id = cpu_to_be32(_id), .tlsid = cpu_to_be64(_tlsid), \
+ .tcp_seq = cpu_to_be32(_tcp_seq), .version = _version, \
+ .cipher = _cipher, \
+ }
+
+struct fun_admin_ktls_modify_rsp {
+ struct fun_admin_rsp_common common;
+
+ __u8 subop;
+ __u8 rsvd0[3];
+ __be32 id;
+
+ __be64 tlsid;
+};
+
+struct fun_req_common {
+ __u8 op;
+ __u8 len8;
+ __be16 flags;
+ __u8 suboff8;
+ __u8 rsvd0;
+ __be16 cid;
+};
+
+struct fun_rsp_common {
+ __u8 op;
+ __u8 len8;
+ __be16 flags;
+ __u8 suboff8;
+ __u8 ret;
+ __be16 cid;
+};
+
+struct fun_cqe_info {
+ __be16 sqhd;
+ __be16 sqid;
+ __be16 cid;
+ __be16 sf_p;
+};
+
+enum fun_eprq_def {
+ FUN_EPRQ_PKT_ALIGN = 0x80,
+};
+
+struct fun_eprq_rqbuf {
+ __be64 bufaddr;
+};
+
+#define FUN_EPRQ_RQBUF_INIT(_bufaddr) \
+ (struct fun_eprq_rqbuf) { \
+ .bufaddr = cpu_to_be64(_bufaddr), \
+ }
+
+enum fun_eth_op {
+ FUN_ETH_OP_TX = 0x1,
+ FUN_ETH_OP_RX = 0x2,
+};
+
+enum {
+ FUN_ETH_OFFLOAD_EN = 0x8000,
+ FUN_ETH_OUTER_EN = 0x4000,
+ FUN_ETH_INNER_LSO = 0x2000,
+ FUN_ETH_INNER_TSO = 0x1000,
+ FUN_ETH_OUTER_IPV6 = 0x800,
+ FUN_ETH_OUTER_UDP = 0x400,
+ FUN_ETH_INNER_IPV6 = 0x200,
+ FUN_ETH_INNER_UDP = 0x100,
+ FUN_ETH_UPDATE_OUTER_L3_LEN = 0x80,
+ FUN_ETH_UPDATE_OUTER_L3_CKSUM = 0x40,
+ FUN_ETH_UPDATE_OUTER_L4_LEN = 0x20,
+ FUN_ETH_UPDATE_OUTER_L4_CKSUM = 0x10,
+ FUN_ETH_UPDATE_INNER_L3_LEN = 0x8,
+ FUN_ETH_UPDATE_INNER_L3_CKSUM = 0x4,
+ FUN_ETH_UPDATE_INNER_L4_LEN = 0x2,
+ FUN_ETH_UPDATE_INNER_L4_CKSUM = 0x1,
+};
+
+struct fun_eth_offload {
+ __be16 flags; /* combination of above flags */
+ __be16 mss; /* TSO max seg size */
+ __be16 tcp_doff_flags; /* TCP data offset + flags 16b word */
+ __be16 vlan;
+
+ __be16 inner_l3_off; /* Inner L3 header offset */
+ __be16 inner_l4_off; /* Inner L4 header offset */
+ __be16 outer_l3_off; /* Outer L3 header offset */
+ __be16 outer_l4_off; /* Outer L4 header offset */
+};
+
+static inline void fun_eth_offload_init(struct fun_eth_offload *s, u16 flags,
+ u16 mss, __be16 tcp_doff_flags,
+ __be16 vlan, u16 inner_l3_off,
+ u16 inner_l4_off, u16 outer_l3_off,
+ u16 outer_l4_off)
+{
+ s->flags = cpu_to_be16(flags);
+ s->mss = cpu_to_be16(mss);
+ s->tcp_doff_flags = tcp_doff_flags;
+ s->vlan = vlan;
+ s->inner_l3_off = cpu_to_be16(inner_l3_off);
+ s->inner_l4_off = cpu_to_be16(inner_l4_off);
+ s->outer_l3_off = cpu_to_be16(outer_l3_off);
+ s->outer_l4_off = cpu_to_be16(outer_l4_off);
+}
+
+struct fun_eth_tls {
+ __be64 tlsid;
+};
+
+enum {
+ FUN_ETH_TX_TLS = 0x8000,
+};
+
+struct fun_eth_tx_req {
+ __u8 op;
+ __u8 len8;
+ __be16 flags;
+ __u8 suboff8;
+ __u8 repr_idn;
+ __be16 encap_proto;
+
+ struct fun_eth_offload offload;
+
+ struct fun_dataop_hdr dataop;
+};
+
+struct fun_eth_rx_cv {
+ __be16 il4_prot_to_l2_type;
+};
+
+#define FUN_ETH_RX_CV_IL4_PROT_S 13U
+#define FUN_ETH_RX_CV_IL4_PROT_M 0x3
+
+#define FUN_ETH_RX_CV_IL3_PROT_S 11U
+#define FUN_ETH_RX_CV_IL3_PROT_M 0x3
+
+#define FUN_ETH_RX_CV_OL4_PROT_S 8U
+#define FUN_ETH_RX_CV_OL4_PROT_M 0x7
+
+#define FUN_ETH_RX_CV_ENCAP_TYPE_S 6U
+#define FUN_ETH_RX_CV_ENCAP_TYPE_M 0x3
+
+#define FUN_ETH_RX_CV_OL3_PROT_S 4U
+#define FUN_ETH_RX_CV_OL3_PROT_M 0x3
+
+#define FUN_ETH_RX_CV_VLAN_TYPE_S 3U
+#define FUN_ETH_RX_CV_VLAN_TYPE_M 0x1
+
+#define FUN_ETH_RX_CV_L2_TYPE_S 2U
+#define FUN_ETH_RX_CV_L2_TYPE_M 0x1
+
+enum fun_rx_cv {
+ FUN_RX_CV_NONE = 0x0,
+ FUN_RX_CV_IP = 0x2,
+ FUN_RX_CV_IP6 = 0x3,
+ FUN_RX_CV_TCP = 0x2,
+ FUN_RX_CV_UDP = 0x3,
+ FUN_RX_CV_VXLAN = 0x2,
+ FUN_RX_CV_MPLS = 0x3,
+};
+
+struct fun_eth_cqe {
+ __u8 op;
+ __u8 len8;
+ __u8 nsgl;
+ __u8 repr_idn;
+ __be32 pkt_len;
+
+ __be64 timestamp;
+
+ __be16 pkt_cv;
+ __be16 rsvd0;
+ __be32 hash;
+
+ __be16 encap_proto;
+ __be16 vlan;
+ __be32 rsvd1;
+
+ __be32 buf_offset;
+ __be16 headroom;
+ __be16 csum;
+};
+
+enum fun_admin_adi_attr {
+ FUN_ADMIN_ADI_ATTR_MACADDR = 0x1,
+ FUN_ADMIN_ADI_ATTR_VLAN = 0x2,
+ FUN_ADMIN_ADI_ATTR_RATE = 0x3,
+};
+
+struct fun_adi_param {
+ union adi_param {
+ struct fun_adi_mac {
+ __be64 addr;
+ } mac;
+ struct fun_adi_vlan {
+ __be32 rsvd;
+ __be16 eth_type;
+ __be16 tci;
+ } vlan;
+ struct fun_adi_rate {
+ __be32 rsvd;
+ __be32 tx_mbps;
+ } rate;
+ } u;
+};
+
+#define FUN_ADI_MAC_INIT(_addr) \
+ (struct fun_adi_mac) { \
+ .addr = cpu_to_be64(_addr), \
+ }
+
+#define FUN_ADI_VLAN_INIT(_eth_type, _tci) \
+ (struct fun_adi_vlan) { \
+ .eth_type = cpu_to_be16(_eth_type), .tci = cpu_to_be16(_tci), \
+ }
+
+#define FUN_ADI_RATE_INIT(_tx_mbps) \
+ (struct fun_adi_rate) { \
+ .tx_mbps = cpu_to_be32(_tx_mbps), \
+ }
+
+struct fun_admin_adi_req {
+ struct fun_admin_req_common common;
+
+ union adi_req_subop {
+ struct fun_admin_adi_write_req {
+ __u8 subop;
+ __u8 attribute;
+ __be16 rsvd;
+ __be32 id;
+
+ struct fun_adi_param param;
+ } write;
+ } u;
+};
+
+#define FUN_ADMIN_ADI_WRITE_REQ_INIT(_subop, _attribute, _id) \
+ (struct fun_admin_adi_write_req) { \
+ .subop = (_subop), .attribute = (_attribute), \
+ .id = cpu_to_be32(_id), \
+ }
+
+#endif /* __FUN_HCI_H */
diff --git a/drivers/net/ethernet/fungible/funcore/fun_queue.c b/drivers/net/ethernet/fungible/funcore/fun_queue.c
new file mode 100644
index 000000000..8ab9f6843
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funcore/fun_queue.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/log2.h>
+#include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+
+#include "fun_dev.h"
+#include "fun_queue.h"
+
+/* Allocate memory for a queue. This includes the memory for the HW descriptor
+ * ring, an optional 64b HW write-back area, and an optional SW state ring.
+ * Returns the virtual and DMA addresses of the HW ring, the VA of the SW ring,
+ * and the VA of the write-back area.
+ */
+void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth,
+ size_t hw_desc_sz, size_t sw_desc_sz, bool wb,
+ int numa_node, dma_addr_t *dma_addr, void **sw_va,
+ volatile __be64 **wb_va)
+{
+ int dev_node = dev_to_node(dma_dev);
+ size_t dma_sz;
+ void *va;
+
+ if (numa_node == NUMA_NO_NODE)
+ numa_node = dev_node;
+
+ /* Place optional write-back area at end of descriptor ring. */
+ dma_sz = hw_desc_sz * depth;
+ if (wb)
+ dma_sz += sizeof(u64);
+
+ set_dev_node(dma_dev, numa_node);
+ va = dma_alloc_coherent(dma_dev, dma_sz, dma_addr, GFP_KERNEL);
+ set_dev_node(dma_dev, dev_node);
+ if (!va)
+ return NULL;
+
+ if (sw_desc_sz) {
+ *sw_va = kvzalloc_node(sw_desc_sz * depth, GFP_KERNEL,
+ numa_node);
+ if (!*sw_va) {
+ dma_free_coherent(dma_dev, dma_sz, va, *dma_addr);
+ return NULL;
+ }
+ }
+
+ if (wb)
+ *wb_va = va + dma_sz - sizeof(u64);
+ return va;
+}
+EXPORT_SYMBOL_GPL(fun_alloc_ring_mem);
+
+void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz,
+ bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va)
+{
+ if (hw_va) {
+ size_t sz = depth * hw_desc_sz;
+
+ if (wb)
+ sz += sizeof(u64);
+ dma_free_coherent(dma_dev, sz, hw_va, dma_addr);
+ }
+ kvfree(sw_va);
+}
+EXPORT_SYMBOL_GPL(fun_free_ring_mem);
+
+/* Prepare and issue an admin command to create an SQ on the device with the
+ * provided parameters. If the queue ID is auto-allocated by the device it is
+ * returned in *sqidp.
+ */
+int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid,
+ u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr,
+ u8 coal_nentries, u8 coal_usec, u32 irq_num,
+ u32 scan_start_id, u32 scan_end_id,
+ u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp)
+{
+ union {
+ struct fun_admin_epsq_req req;
+ struct fun_admin_generic_create_rsp rsp;
+ } cmd;
+ dma_addr_t wb_addr;
+ u32 hw_qid;
+ int rc;
+
+ if (sq_depth > fdev->q_depth)
+ return -EINVAL;
+ if (flags & FUN_ADMIN_EPSQ_CREATE_FLAG_RQ)
+ sqe_size_log2 = ilog2(sizeof(struct fun_eprq_rqbuf));
+
+ wb_addr = dma_addr + (sq_depth << sqe_size_log2);
+
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPSQ,
+ sizeof(cmd.req));
+ cmd.req.u.create =
+ FUN_ADMIN_EPSQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags,
+ sqid, cqid, sqe_size_log2,
+ sq_depth - 1, dma_addr, 0,
+ coal_nentries, coal_usec,
+ irq_num, scan_start_id,
+ scan_end_id, 0,
+ rq_buf_size_log2,
+ ilog2(sizeof(u64)), wb_addr);
+
+ rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common,
+ &cmd.rsp, sizeof(cmd.rsp), 0);
+ if (rc)
+ return rc;
+
+ hw_qid = be32_to_cpu(cmd.rsp.id);
+ *dbp = fun_sq_db_addr(fdev, hw_qid);
+ if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR)
+ *sqidp = hw_qid;
+ return rc;
+}
+EXPORT_SYMBOL_GPL(fun_sq_create);
+
+/* Prepare and issue an admin command to create a CQ on the device with the
+ * provided parameters. If the queue ID is auto-allocated by the device it is
+ * returned in *cqidp.
+ */
+int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid,
+ u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr,
+ u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec,
+ u32 irq_num, u32 scan_start_id, u32 scan_end_id, u32 *cqidp,
+ u32 __iomem **dbp)
+{
+ union {
+ struct fun_admin_epcq_req req;
+ struct fun_admin_generic_create_rsp rsp;
+ } cmd;
+ u32 hw_qid;
+ int rc;
+
+ if (cq_depth > fdev->q_depth)
+ return -EINVAL;
+
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ,
+ sizeof(cmd.req));
+ cmd.req.u.create =
+ FUN_ADMIN_EPCQ_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, flags,
+ cqid, rqid, cqe_size_log2,
+ cq_depth - 1, dma_addr, tailroom,
+ headroom / 2, 0, coal_nentries,
+ coal_usec, irq_num,
+ scan_start_id, scan_end_id, 0);
+
+ rc = fun_submit_admin_sync_cmd(fdev, &cmd.req.common,
+ &cmd.rsp, sizeof(cmd.rsp), 0);
+ if (rc)
+ return rc;
+
+ hw_qid = be32_to_cpu(cmd.rsp.id);
+ *dbp = fun_cq_db_addr(fdev, hw_qid);
+ if (flags & FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR)
+ *cqidp = hw_qid;
+ return rc;
+}
+EXPORT_SYMBOL_GPL(fun_cq_create);
+
+static bool fun_sq_is_head_wb(const struct fun_queue *funq)
+{
+ return funq->sq_flags & FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS;
+}
+
+static void fun_clean_rq(struct fun_queue *funq)
+{
+ struct fun_dev *fdev = funq->fdev;
+ struct fun_rq_info *rqinfo;
+ unsigned int i;
+
+ for (i = 0; i < funq->rq_depth; i++) {
+ rqinfo = &funq->rq_info[i];
+ if (rqinfo->page) {
+ dma_unmap_page(fdev->dev, rqinfo->dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ put_page(rqinfo->page);
+ rqinfo->page = NULL;
+ }
+ }
+}
+
+static int fun_fill_rq(struct fun_queue *funq)
+{
+ struct device *dev = funq->fdev->dev;
+ int i, node = dev_to_node(dev);
+ struct fun_rq_info *rqinfo;
+
+ for (i = 0; i < funq->rq_depth; i++) {
+ rqinfo = &funq->rq_info[i];
+ rqinfo->page = alloc_pages_node(node, GFP_KERNEL, 0);
+ if (unlikely(!rqinfo->page))
+ return -ENOMEM;
+
+ rqinfo->dma = dma_map_page(dev, rqinfo->page, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(dev, rqinfo->dma))) {
+ put_page(rqinfo->page);
+ rqinfo->page = NULL;
+ return -ENOMEM;
+ }
+
+ funq->rqes[i] = FUN_EPRQ_RQBUF_INIT(rqinfo->dma);
+ }
+
+ funq->rq_tail = funq->rq_depth - 1;
+ return 0;
+}
+
+static void fun_rq_update_pos(struct fun_queue *funq, int buf_offset)
+{
+ if (buf_offset <= funq->rq_buf_offset) {
+ struct fun_rq_info *rqinfo = &funq->rq_info[funq->rq_buf_idx];
+ struct device *dev = funq->fdev->dev;
+
+ dma_sync_single_for_device(dev, rqinfo->dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ funq->num_rqe_to_fill++;
+ if (++funq->rq_buf_idx == funq->rq_depth)
+ funq->rq_buf_idx = 0;
+ }
+ funq->rq_buf_offset = buf_offset;
+}
+
+/* Given a command response with data scattered across >= 1 RQ buffers return
+ * a pointer to a contiguous buffer containing all the data. If the data is in
+ * one RQ buffer the start address within that buffer is returned, otherwise a
+ * new buffer is allocated and the data is gathered into it.
+ */
+static void *fun_data_from_rq(struct fun_queue *funq,
+ const struct fun_rsp_common *rsp, bool *need_free)
+{
+ u32 bufoff, total_len, remaining, fragsize, dataoff;
+ struct device *dma_dev = funq->fdev->dev;
+ const struct fun_dataop_rqbuf *databuf;
+ const struct fun_dataop_hdr *dataop;
+ const struct fun_rq_info *rqinfo;
+ void *data;
+
+ dataop = (void *)rsp + rsp->suboff8 * 8;
+ total_len = be32_to_cpu(dataop->total_len);
+
+ if (likely(dataop->nsgl == 1)) {
+ databuf = (struct fun_dataop_rqbuf *)dataop->imm;
+ bufoff = be32_to_cpu(databuf->bufoff);
+ fun_rq_update_pos(funq, bufoff);
+ rqinfo = &funq->rq_info[funq->rq_buf_idx];
+ dma_sync_single_for_cpu(dma_dev, rqinfo->dma + bufoff,
+ total_len, DMA_FROM_DEVICE);
+ *need_free = false;
+ return page_address(rqinfo->page) + bufoff;
+ }
+
+ /* For scattered completions gather the fragments into one buffer. */
+
+ data = kmalloc(total_len, GFP_ATOMIC);
+ /* NULL is OK here. In case of failure we still need to consume the data
+ * for proper buffer accounting but indicate an error in the response.
+ */
+ if (likely(data))
+ *need_free = true;
+
+ dataoff = 0;
+ for (remaining = total_len; remaining; remaining -= fragsize) {
+ fun_rq_update_pos(funq, 0);
+ fragsize = min_t(unsigned int, PAGE_SIZE, remaining);
+ if (data) {
+ rqinfo = &funq->rq_info[funq->rq_buf_idx];
+ dma_sync_single_for_cpu(dma_dev, rqinfo->dma, fragsize,
+ DMA_FROM_DEVICE);
+ memcpy(data + dataoff, page_address(rqinfo->page),
+ fragsize);
+ dataoff += fragsize;
+ }
+ }
+ return data;
+}
+
+unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max)
+{
+ const struct fun_cqe_info *info;
+ struct fun_rsp_common *rsp;
+ unsigned int new_cqes;
+ u16 sf_p, flags;
+ bool need_free;
+ void *cqe;
+
+ if (!max)
+ max = funq->cq_depth - 1;
+
+ for (new_cqes = 0; new_cqes < max; new_cqes++) {
+ cqe = funq->cqes + (funq->cq_head << funq->cqe_size_log2);
+ info = funq_cqe_info(funq, cqe);
+ sf_p = be16_to_cpu(info->sf_p);
+
+ if ((sf_p & 1) != funq->cq_phase)
+ break;
+
+ /* ensure the phase tag is read before other CQE fields */
+ dma_rmb();
+
+ if (++funq->cq_head == funq->cq_depth) {
+ funq->cq_head = 0;
+ funq->cq_phase = !funq->cq_phase;
+ }
+
+ rsp = cqe;
+ flags = be16_to_cpu(rsp->flags);
+
+ need_free = false;
+ if (unlikely(flags & FUN_REQ_COMMON_FLAG_CQE_IN_RQBUF)) {
+ rsp = fun_data_from_rq(funq, rsp, &need_free);
+ if (!rsp) {
+ rsp = cqe;
+ rsp->len8 = 1;
+ if (rsp->ret == 0)
+ rsp->ret = ENOMEM;
+ }
+ }
+
+ if (funq->cq_cb)
+ funq->cq_cb(funq, funq->cb_data, rsp, info);
+ if (need_free)
+ kfree(rsp);
+ }
+
+ dev_dbg(funq->fdev->dev, "CQ %u, new CQEs %u/%u, head %u, phase %u\n",
+ funq->cqid, new_cqes, max, funq->cq_head, funq->cq_phase);
+ return new_cqes;
+}
+
+unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max)
+{
+ unsigned int processed;
+ u32 db;
+
+ processed = __fun_process_cq(funq, max);
+
+ if (funq->num_rqe_to_fill) {
+ funq->rq_tail = (funq->rq_tail + funq->num_rqe_to_fill) %
+ funq->rq_depth;
+ funq->num_rqe_to_fill = 0;
+ writel(funq->rq_tail, funq->rq_db);
+ }
+
+ db = funq->cq_head | FUN_DB_IRQ_ARM_F;
+ writel(db, funq->cq_db);
+ return processed;
+}
+
+static int fun_alloc_sqes(struct fun_queue *funq)
+{
+ funq->sq_cmds = fun_alloc_ring_mem(funq->fdev->dev, funq->sq_depth,
+ 1 << funq->sqe_size_log2, 0,
+ fun_sq_is_head_wb(funq),
+ NUMA_NO_NODE, &funq->sq_dma_addr,
+ NULL, &funq->sq_head);
+ return funq->sq_cmds ? 0 : -ENOMEM;
+}
+
+static int fun_alloc_cqes(struct fun_queue *funq)
+{
+ funq->cqes = fun_alloc_ring_mem(funq->fdev->dev, funq->cq_depth,
+ 1 << funq->cqe_size_log2, 0, false,
+ NUMA_NO_NODE, &funq->cq_dma_addr, NULL,
+ NULL);
+ return funq->cqes ? 0 : -ENOMEM;
+}
+
+static int fun_alloc_rqes(struct fun_queue *funq)
+{
+ funq->rqes = fun_alloc_ring_mem(funq->fdev->dev, funq->rq_depth,
+ sizeof(*funq->rqes),
+ sizeof(*funq->rq_info), false,
+ NUMA_NO_NODE, &funq->rq_dma_addr,
+ (void **)&funq->rq_info, NULL);
+ return funq->rqes ? 0 : -ENOMEM;
+}
+
+/* Free a queue's structures. */
+void fun_free_queue(struct fun_queue *funq)
+{
+ struct device *dev = funq->fdev->dev;
+
+ fun_free_ring_mem(dev, funq->cq_depth, 1 << funq->cqe_size_log2, false,
+ funq->cqes, funq->cq_dma_addr, NULL);
+ fun_free_ring_mem(dev, funq->sq_depth, 1 << funq->sqe_size_log2,
+ fun_sq_is_head_wb(funq), funq->sq_cmds,
+ funq->sq_dma_addr, NULL);
+
+ if (funq->rqes) {
+ fun_clean_rq(funq);
+ fun_free_ring_mem(dev, funq->rq_depth, sizeof(*funq->rqes),
+ false, funq->rqes, funq->rq_dma_addr,
+ funq->rq_info);
+ }
+
+ kfree(funq);
+}
+
+/* Allocate and initialize a funq's structures. */
+struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid,
+ const struct fun_queue_alloc_req *req)
+{
+ struct fun_queue *funq = kzalloc(sizeof(*funq), GFP_KERNEL);
+
+ if (!funq)
+ return NULL;
+
+ funq->fdev = fdev;
+ spin_lock_init(&funq->sq_lock);
+
+ funq->qid = qid;
+
+ /* Initial CQ/SQ/RQ ids */
+ if (req->rq_depth) {
+ funq->cqid = 2 * qid;
+ if (funq->qid) {
+ /* I/O Q: use rqid = cqid, sqid = +1 */
+ funq->rqid = funq->cqid;
+ funq->sqid = funq->rqid + 1;
+ } else {
+ /* Admin Q: sqid is always 0, use ID 1 for RQ */
+ funq->sqid = 0;
+ funq->rqid = 1;
+ }
+ } else {
+ funq->cqid = qid;
+ funq->sqid = qid;
+ }
+
+ funq->cq_flags = req->cq_flags;
+ funq->sq_flags = req->sq_flags;
+
+ funq->cqe_size_log2 = req->cqe_size_log2;
+ funq->sqe_size_log2 = req->sqe_size_log2;
+
+ funq->cq_depth = req->cq_depth;
+ funq->sq_depth = req->sq_depth;
+
+ funq->cq_intcoal_nentries = req->cq_intcoal_nentries;
+ funq->cq_intcoal_usec = req->cq_intcoal_usec;
+
+ funq->sq_intcoal_nentries = req->sq_intcoal_nentries;
+ funq->sq_intcoal_usec = req->sq_intcoal_usec;
+
+ if (fun_alloc_cqes(funq))
+ goto free_funq;
+
+ funq->cq_phase = 1;
+
+ if (fun_alloc_sqes(funq))
+ goto free_funq;
+
+ if (req->rq_depth) {
+ funq->rq_flags = req->rq_flags | FUN_ADMIN_EPSQ_CREATE_FLAG_RQ;
+ funq->rq_depth = req->rq_depth;
+ funq->rq_buf_offset = -1;
+
+ if (fun_alloc_rqes(funq) || fun_fill_rq(funq))
+ goto free_funq;
+ }
+
+ funq->cq_vector = -1;
+ funq->cqe_info_offset = (1 << funq->cqe_size_log2) - sizeof(struct fun_cqe_info);
+
+ /* SQ/CQ 0 are implicitly created, assign their doorbells now.
+ * Other queues are assigned doorbells at their explicit creation.
+ */
+ if (funq->sqid == 0)
+ funq->sq_db = fun_sq_db_addr(fdev, 0);
+ if (funq->cqid == 0)
+ funq->cq_db = fun_cq_db_addr(fdev, 0);
+
+ return funq;
+
+free_funq:
+ fun_free_queue(funq);
+ return NULL;
+}
+
+/* Create a funq's CQ on the device. */
+static int fun_create_cq(struct fun_queue *funq)
+{
+ struct fun_dev *fdev = funq->fdev;
+ unsigned int rqid;
+ int rc;
+
+ rqid = funq->cq_flags & FUN_ADMIN_EPCQ_CREATE_FLAG_RQ ?
+ funq->rqid : FUN_HCI_ID_INVALID;
+ rc = fun_cq_create(fdev, funq->cq_flags, funq->cqid, rqid,
+ funq->cqe_size_log2, funq->cq_depth,
+ funq->cq_dma_addr, 0, 0, funq->cq_intcoal_nentries,
+ funq->cq_intcoal_usec, funq->cq_vector, 0, 0,
+ &funq->cqid, &funq->cq_db);
+ if (!rc)
+ dev_dbg(fdev->dev, "created CQ %u\n", funq->cqid);
+
+ return rc;
+}
+
+/* Create a funq's SQ on the device. */
+static int fun_create_sq(struct fun_queue *funq)
+{
+ struct fun_dev *fdev = funq->fdev;
+ int rc;
+
+ rc = fun_sq_create(fdev, funq->sq_flags, funq->sqid, funq->cqid,
+ funq->sqe_size_log2, funq->sq_depth,
+ funq->sq_dma_addr, funq->sq_intcoal_nentries,
+ funq->sq_intcoal_usec, funq->cq_vector, 0, 0,
+ 0, &funq->sqid, &funq->sq_db);
+ if (!rc)
+ dev_dbg(fdev->dev, "created SQ %u\n", funq->sqid);
+
+ return rc;
+}
+
+/* Create a funq's RQ on the device. */
+int fun_create_rq(struct fun_queue *funq)
+{
+ struct fun_dev *fdev = funq->fdev;
+ int rc;
+
+ rc = fun_sq_create(fdev, funq->rq_flags, funq->rqid, funq->cqid, 0,
+ funq->rq_depth, funq->rq_dma_addr, 0, 0,
+ funq->cq_vector, 0, 0, PAGE_SHIFT, &funq->rqid,
+ &funq->rq_db);
+ if (!rc)
+ dev_dbg(fdev->dev, "created RQ %u\n", funq->rqid);
+
+ return rc;
+}
+
+static unsigned int funq_irq(struct fun_queue *funq)
+{
+ return pci_irq_vector(to_pci_dev(funq->fdev->dev), funq->cq_vector);
+}
+
+int fun_request_irq(struct fun_queue *funq, const char *devname,
+ irq_handler_t handler, void *data)
+{
+ int rc;
+
+ if (funq->cq_vector < 0)
+ return -EINVAL;
+
+ funq->irq_handler = handler;
+ funq->irq_data = data;
+
+ snprintf(funq->irqname, sizeof(funq->irqname),
+ funq->qid ? "%s-q[%d]" : "%s-adminq", devname, funq->qid);
+
+ rc = request_irq(funq_irq(funq), handler, 0, funq->irqname, data);
+ if (rc)
+ funq->irq_handler = NULL;
+
+ return rc;
+}
+
+/* Create all component queues of a funq on the device. */
+int fun_create_queue(struct fun_queue *funq)
+{
+ int rc;
+
+ rc = fun_create_cq(funq);
+ if (rc)
+ return rc;
+
+ if (funq->rq_depth) {
+ rc = fun_create_rq(funq);
+ if (rc)
+ goto release_cq;
+ }
+
+ rc = fun_create_sq(funq);
+ if (rc)
+ goto release_rq;
+
+ return 0;
+
+release_rq:
+ fun_destroy_sq(funq->fdev, funq->rqid);
+release_cq:
+ fun_destroy_cq(funq->fdev, funq->cqid);
+ return rc;
+}
+
+void fun_free_irq(struct fun_queue *funq)
+{
+ if (funq->irq_handler) {
+ unsigned int vector = funq_irq(funq);
+
+ free_irq(vector, funq->irq_data);
+ funq->irq_handler = NULL;
+ funq->irq_data = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/fungible/funcore/fun_queue.h b/drivers/net/ethernet/fungible/funcore/fun_queue.h
new file mode 100644
index 000000000..7fb53d0ae
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funcore/fun_queue.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef _FUN_QEUEUE_H
+#define _FUN_QEUEUE_H
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+
+struct device;
+struct fun_dev;
+struct fun_queue;
+struct fun_cqe_info;
+struct fun_rsp_common;
+
+typedef void (*cq_callback_t)(struct fun_queue *funq, void *data, void *msg,
+ const struct fun_cqe_info *info);
+
+struct fun_rq_info {
+ dma_addr_t dma;
+ struct page *page;
+};
+
+/* A queue group consisting of an SQ, a CQ, and an optional RQ. */
+struct fun_queue {
+ struct fun_dev *fdev;
+ spinlock_t sq_lock;
+
+ dma_addr_t cq_dma_addr;
+ dma_addr_t sq_dma_addr;
+ dma_addr_t rq_dma_addr;
+
+ u32 __iomem *cq_db;
+ u32 __iomem *sq_db;
+ u32 __iomem *rq_db;
+
+ void *cqes;
+ void *sq_cmds;
+ struct fun_eprq_rqbuf *rqes;
+ struct fun_rq_info *rq_info;
+
+ u32 cqid;
+ u32 sqid;
+ u32 rqid;
+
+ u32 cq_depth;
+ u32 sq_depth;
+ u32 rq_depth;
+
+ u16 cq_head;
+ u16 sq_tail;
+ u16 rq_tail;
+
+ u8 cqe_size_log2;
+ u8 sqe_size_log2;
+
+ u16 cqe_info_offset;
+
+ u16 rq_buf_idx;
+ int rq_buf_offset;
+ u16 num_rqe_to_fill;
+
+ u8 cq_intcoal_usec;
+ u8 cq_intcoal_nentries;
+ u8 sq_intcoal_usec;
+ u8 sq_intcoal_nentries;
+
+ u16 cq_flags;
+ u16 sq_flags;
+ u16 rq_flags;
+
+ /* SQ head writeback */
+ u16 sq_comp;
+
+ volatile __be64 *sq_head;
+
+ cq_callback_t cq_cb;
+ void *cb_data;
+
+ irq_handler_t irq_handler;
+ void *irq_data;
+ s16 cq_vector;
+ u8 cq_phase;
+
+ /* I/O q index */
+ u16 qid;
+
+ char irqname[24];
+};
+
+static inline void *fun_sqe_at(const struct fun_queue *funq, unsigned int pos)
+{
+ return funq->sq_cmds + (pos << funq->sqe_size_log2);
+}
+
+static inline void funq_sq_post_tail(struct fun_queue *funq, u16 tail)
+{
+ if (++tail == funq->sq_depth)
+ tail = 0;
+ funq->sq_tail = tail;
+ writel(tail, funq->sq_db);
+}
+
+static inline struct fun_cqe_info *funq_cqe_info(const struct fun_queue *funq,
+ void *cqe)
+{
+ return cqe + funq->cqe_info_offset;
+}
+
+static inline void funq_rq_post(struct fun_queue *funq)
+{
+ writel(funq->rq_tail, funq->rq_db);
+}
+
+struct fun_queue_alloc_req {
+ u8 cqe_size_log2;
+ u8 sqe_size_log2;
+
+ u16 cq_flags;
+ u16 sq_flags;
+ u16 rq_flags;
+
+ u32 cq_depth;
+ u32 sq_depth;
+ u32 rq_depth;
+
+ u8 cq_intcoal_usec;
+ u8 cq_intcoal_nentries;
+ u8 sq_intcoal_usec;
+ u8 sq_intcoal_nentries;
+};
+
+int fun_sq_create(struct fun_dev *fdev, u16 flags, u32 sqid, u32 cqid,
+ u8 sqe_size_log2, u32 sq_depth, dma_addr_t dma_addr,
+ u8 coal_nentries, u8 coal_usec, u32 irq_num,
+ u32 scan_start_id, u32 scan_end_id,
+ u32 rq_buf_size_log2, u32 *sqidp, u32 __iomem **dbp);
+int fun_cq_create(struct fun_dev *fdev, u16 flags, u32 cqid, u32 rqid,
+ u8 cqe_size_log2, u32 cq_depth, dma_addr_t dma_addr,
+ u16 headroom, u16 tailroom, u8 coal_nentries, u8 coal_usec,
+ u32 irq_num, u32 scan_start_id, u32 scan_end_id,
+ u32 *cqidp, u32 __iomem **dbp);
+void *fun_alloc_ring_mem(struct device *dma_dev, size_t depth,
+ size_t hw_desc_sz, size_t sw_desc_size, bool wb,
+ int numa_node, dma_addr_t *dma_addr, void **sw_va,
+ volatile __be64 **wb_va);
+void fun_free_ring_mem(struct device *dma_dev, size_t depth, size_t hw_desc_sz,
+ bool wb, void *hw_va, dma_addr_t dma_addr, void *sw_va);
+
+#define fun_destroy_sq(fdev, sqid) \
+ fun_res_destroy((fdev), FUN_ADMIN_OP_EPSQ, 0, (sqid))
+#define fun_destroy_cq(fdev, cqid) \
+ fun_res_destroy((fdev), FUN_ADMIN_OP_EPCQ, 0, (cqid))
+
+struct fun_queue *fun_alloc_queue(struct fun_dev *fdev, int qid,
+ const struct fun_queue_alloc_req *req);
+void fun_free_queue(struct fun_queue *funq);
+
+static inline void fun_set_cq_callback(struct fun_queue *funq, cq_callback_t cb,
+ void *cb_data)
+{
+ funq->cq_cb = cb;
+ funq->cb_data = cb_data;
+}
+
+int fun_create_rq(struct fun_queue *funq);
+int fun_create_queue(struct fun_queue *funq);
+
+void fun_free_irq(struct fun_queue *funq);
+int fun_request_irq(struct fun_queue *funq, const char *devname,
+ irq_handler_t handler, void *data);
+
+unsigned int __fun_process_cq(struct fun_queue *funq, unsigned int max);
+unsigned int fun_process_cq(struct fun_queue *funq, unsigned int max);
+
+#endif /* _FUN_QEUEUE_H */
diff --git a/drivers/net/ethernet/fungible/funeth/Kconfig b/drivers/net/ethernet/fungible/funeth/Kconfig
new file mode 100644
index 000000000..c72ad9386
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/Kconfig
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Fungible Ethernet driver configuration
+#
+
+config FUN_ETH
+ tristate "Fungible Ethernet device driver"
+ depends on PCI && PCI_MSI
+ depends on TLS && TLS_DEVICE || TLS_DEVICE=n
+ select NET_DEVLINK
+ select FUN_CORE
+ help
+ This driver supports the Ethernet functionality of Fungible adapters.
+ It works with both physical and virtual functions.
+
+ To compile this driver as a module, choose M here. The module
+ will be called funeth.
diff --git a/drivers/net/ethernet/fungible/funeth/Makefile b/drivers/net/ethernet/fungible/funeth/Makefile
new file mode 100644
index 000000000..646d69595
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+ccflags-y += -I$(srctree)/$(src)/../funcore -I$(srctree)/$(src)
+
+obj-$(CONFIG_FUN_ETH) += funeth.o
+
+funeth-y := funeth_main.o funeth_rx.o funeth_tx.o funeth_devlink.o \
+ funeth_ethtool.o
+
+funeth-$(CONFIG_TLS_DEVICE) += funeth_ktls.o
diff --git a/drivers/net/ethernet/fungible/funeth/fun_port.h b/drivers/net/ethernet/fungible/funeth/fun_port.h
new file mode 100644
index 000000000..0f9da44e3
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/fun_port.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef _FUN_PORT_H
+#define _FUN_PORT_H
+
+enum port_mac_rx_stats {
+ PORT_MAC_RX_etherStatsOctets = 0x0,
+ PORT_MAC_RX_OctetsReceivedOK = 0x1,
+ PORT_MAC_RX_aAlignmentErrors = 0x2,
+ PORT_MAC_RX_aPAUSEMACCtrlFramesReceived = 0x3,
+ PORT_MAC_RX_aFrameTooLongErrors = 0x4,
+ PORT_MAC_RX_aInRangeLengthErrors = 0x5,
+ PORT_MAC_RX_aFramesReceivedOK = 0x6,
+ PORT_MAC_RX_aFrameCheckSequenceErrors = 0x7,
+ PORT_MAC_RX_VLANReceivedOK = 0x8,
+ PORT_MAC_RX_ifInErrors = 0x9,
+ PORT_MAC_RX_ifInUcastPkts = 0xa,
+ PORT_MAC_RX_ifInMulticastPkts = 0xb,
+ PORT_MAC_RX_ifInBroadcastPkts = 0xc,
+ PORT_MAC_RX_etherStatsDropEvents = 0xd,
+ PORT_MAC_RX_etherStatsPkts = 0xe,
+ PORT_MAC_RX_etherStatsUndersizePkts = 0xf,
+ PORT_MAC_RX_etherStatsPkts64Octets = 0x10,
+ PORT_MAC_RX_etherStatsPkts65to127Octets = 0x11,
+ PORT_MAC_RX_etherStatsPkts128to255Octets = 0x12,
+ PORT_MAC_RX_etherStatsPkts256to511Octets = 0x13,
+ PORT_MAC_RX_etherStatsPkts512to1023Octets = 0x14,
+ PORT_MAC_RX_etherStatsPkts1024to1518Octets = 0x15,
+ PORT_MAC_RX_etherStatsPkts1519toMaxOctets = 0x16,
+ PORT_MAC_RX_etherStatsOversizePkts = 0x17,
+ PORT_MAC_RX_etherStatsJabbers = 0x18,
+ PORT_MAC_RX_etherStatsFragments = 0x19,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_0 = 0x1a,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_1 = 0x1b,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_2 = 0x1c,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_3 = 0x1d,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_4 = 0x1e,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_5 = 0x1f,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_6 = 0x20,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_7 = 0x21,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_8 = 0x22,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_9 = 0x23,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_10 = 0x24,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_11 = 0x25,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_12 = 0x26,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_13 = 0x27,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_14 = 0x28,
+ PORT_MAC_RX_CBFCPAUSEFramesReceived_15 = 0x29,
+ PORT_MAC_RX_MACControlFramesReceived = 0x2a,
+ PORT_MAC_RX_STATS_MAX = 0x2b,
+};
+
+enum port_mac_tx_stats {
+ PORT_MAC_TX_etherStatsOctets = 0x0,
+ PORT_MAC_TX_OctetsTransmittedOK = 0x1,
+ PORT_MAC_TX_aPAUSEMACCtrlFramesTransmitted = 0x2,
+ PORT_MAC_TX_aFramesTransmittedOK = 0x3,
+ PORT_MAC_TX_VLANTransmittedOK = 0x4,
+ PORT_MAC_TX_ifOutErrors = 0x5,
+ PORT_MAC_TX_ifOutUcastPkts = 0x6,
+ PORT_MAC_TX_ifOutMulticastPkts = 0x7,
+ PORT_MAC_TX_ifOutBroadcastPkts = 0x8,
+ PORT_MAC_TX_etherStatsPkts64Octets = 0x9,
+ PORT_MAC_TX_etherStatsPkts65to127Octets = 0xa,
+ PORT_MAC_TX_etherStatsPkts128to255Octets = 0xb,
+ PORT_MAC_TX_etherStatsPkts256to511Octets = 0xc,
+ PORT_MAC_TX_etherStatsPkts512to1023Octets = 0xd,
+ PORT_MAC_TX_etherStatsPkts1024to1518Octets = 0xe,
+ PORT_MAC_TX_etherStatsPkts1519toMaxOctets = 0xf,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_0 = 0x10,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_1 = 0x11,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_2 = 0x12,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_3 = 0x13,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_4 = 0x14,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_5 = 0x15,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_6 = 0x16,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_7 = 0x17,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_8 = 0x18,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_9 = 0x19,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_10 = 0x1a,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_11 = 0x1b,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_12 = 0x1c,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_13 = 0x1d,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_14 = 0x1e,
+ PORT_MAC_TX_CBFCPAUSEFramesTransmitted_15 = 0x1f,
+ PORT_MAC_TX_MACControlFramesTransmitted = 0x20,
+ PORT_MAC_TX_etherStatsPkts = 0x21,
+ PORT_MAC_TX_STATS_MAX = 0x22,
+};
+
+enum port_mac_fec_stats {
+ PORT_MAC_FEC_Correctable = 0x0,
+ PORT_MAC_FEC_Uncorrectable = 0x1,
+ PORT_MAC_FEC_STATS_MAX = 0x2,
+};
+
+#endif /* _FUN_PORT_H */
diff --git a/drivers/net/ethernet/fungible/funeth/funeth.h b/drivers/net/ethernet/fungible/funeth/funeth.h
new file mode 100644
index 000000000..1250e10d2
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth.h
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef _FUNETH_H
+#define _FUNETH_H
+
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/net_tstamp.h>
+#include <linux/mutex.h>
+#include <linux/seqlock.h>
+#include <linux/xarray.h>
+#include <net/devlink.h>
+#include "fun_dev.h"
+
+#define ADMIN_SQE_SIZE SZ_128
+#define ADMIN_CQE_SIZE SZ_64
+#define ADMIN_RSP_MAX_LEN (ADMIN_CQE_SIZE - sizeof(struct fun_cqe_info))
+
+#define FUN_MAX_MTU 9024
+
+#define SQ_DEPTH 512U
+#define CQ_DEPTH 1024U
+#define RQ_DEPTH (512U / (PAGE_SIZE / 4096))
+
+#define CQ_INTCOAL_USEC 10
+#define CQ_INTCOAL_NPKT 16
+#define SQ_INTCOAL_USEC 10
+#define SQ_INTCOAL_NPKT 16
+
+#define INVALID_LPORT 0xffff
+
+#define FUN_PORT_CAP_PAUSE_MASK (FUN_PORT_CAP_TX_PAUSE | FUN_PORT_CAP_RX_PAUSE)
+
+struct fun_vport_info {
+ u8 mac[ETH_ALEN];
+ u16 vlan;
+ __be16 vlan_proto;
+ u8 qos;
+ u8 spoofchk:1;
+ u8 trusted:1;
+ unsigned int max_rate;
+};
+
+/* "subclass" of fun_dev for Ethernet functions */
+struct fun_ethdev {
+ struct fun_dev fdev;
+
+ /* the function's network ports */
+ struct net_device **netdevs;
+ unsigned int num_ports;
+
+ /* configuration for the function's virtual ports */
+ unsigned int num_vports;
+ struct fun_vport_info *vport_info;
+
+ struct mutex state_mutex; /* nests inside RTNL if both taken */
+
+ unsigned int nsqs_per_port;
+};
+
+static inline struct fun_ethdev *to_fun_ethdev(struct fun_dev *p)
+{
+ return container_of(p, struct fun_ethdev, fdev);
+}
+
+struct fun_qset {
+ struct funeth_rxq **rxqs;
+ struct funeth_txq **txqs;
+ struct funeth_txq **xdpqs;
+ unsigned int nrxqs;
+ unsigned int ntxqs;
+ unsigned int nxdpqs;
+ unsigned int rxq_start;
+ unsigned int txq_start;
+ unsigned int xdpq_start;
+ unsigned int cq_depth;
+ unsigned int rq_depth;
+ unsigned int sq_depth;
+ int state;
+};
+
+/* Per netdevice driver state, i.e., netdev_priv. */
+struct funeth_priv {
+ struct fun_dev *fdev;
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+
+ struct funeth_rxq * __rcu *rxqs;
+ struct funeth_txq **txqs;
+ struct funeth_txq * __rcu *xdpqs;
+
+ struct xarray irqs;
+ unsigned int num_tx_irqs;
+ unsigned int num_rx_irqs;
+ unsigned int rx_irq_ofst;
+
+ unsigned int lane_attrs;
+ u16 lport;
+
+ /* link settings */
+ u64 port_caps;
+ u64 advertising;
+ u64 lp_advertising;
+ unsigned int link_speed;
+ u8 xcvr_type;
+ u8 active_fc;
+ u8 active_fec;
+ u8 link_down_reason;
+ seqcount_t link_seq;
+
+ u32 msg_enable;
+
+ unsigned int num_xdpqs;
+
+ /* ethtool, etc. config parameters */
+ unsigned int sq_depth;
+ unsigned int rq_depth;
+ unsigned int cq_depth;
+ unsigned int cq_irq_db;
+ u8 tx_coal_usec;
+ u8 tx_coal_count;
+ u8 rx_coal_usec;
+ u8 rx_coal_count;
+
+ struct hwtstamp_config hwtstamp_cfg;
+
+ /* cumulative queue stats from earlier queue instances */
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_dropped;
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_dropped;
+
+ /* RSS */
+ unsigned int rss_hw_id;
+ enum fun_eth_hash_alg hash_algo;
+ u8 rss_key[FUN_ETH_RSS_MAX_KEY_SIZE];
+ unsigned int indir_table_nentries;
+ u32 indir_table[FUN_ETH_RSS_MAX_INDIR_ENT];
+ dma_addr_t rss_dma_addr;
+ void *rss_cfg;
+
+ /* DMA area for port stats */
+ dma_addr_t stats_dma_addr;
+ __be64 *stats;
+
+ struct bpf_prog *xdp_prog;
+
+ struct devlink_port dl_port;
+
+ /* kTLS state */
+ unsigned int ktls_id;
+ atomic64_t tx_tls_add;
+ atomic64_t tx_tls_del;
+ atomic64_t tx_tls_resync;
+};
+
+void fun_set_ethtool_ops(struct net_device *netdev);
+int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data);
+int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data);
+int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid);
+int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs,
+ struct netlink_ext_ack *extack);
+int fun_change_num_queues(struct net_device *dev, unsigned int ntx,
+ unsigned int nrx);
+void fun_set_ring_count(struct net_device *netdev, unsigned int ntx,
+ unsigned int nrx);
+int fun_config_rss(struct net_device *dev, int algo, const u8 *key,
+ const u32 *qtable, u8 op);
+
+#endif /* _FUNETH_H */
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c
new file mode 100644
index 000000000..d50c22294
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include "funeth.h"
+#include "funeth_devlink.h"
+
+static int fun_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ return devlink_info_driver_name_put(req, KBUILD_MODNAME);
+}
+
+static const struct devlink_ops fun_dl_ops = {
+ .info_get = fun_dl_info_get,
+};
+
+struct devlink *fun_devlink_alloc(struct device *dev)
+{
+ return devlink_alloc(&fun_dl_ops, sizeof(struct fun_ethdev), dev);
+}
+
+void fun_devlink_free(struct devlink *devlink)
+{
+ devlink_free(devlink);
+}
+
+void fun_devlink_register(struct devlink *devlink)
+{
+ devlink_register(devlink);
+}
+
+void fun_devlink_unregister(struct devlink *devlink)
+{
+ devlink_unregister(devlink);
+}
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.h b/drivers/net/ethernet/fungible/funeth/funeth_devlink.h
new file mode 100644
index 000000000..e40464d57
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef __FUNETH_DEVLINK_H
+#define __FUNETH_DEVLINK_H
+
+#include <net/devlink.h>
+
+struct devlink *fun_devlink_alloc(struct device *dev);
+void fun_devlink_free(struct devlink *devlink);
+void fun_devlink_register(struct devlink *devlink);
+void fun_devlink_unregister(struct devlink *devlink);
+
+#endif /* __FUNETH_DEVLINK_H */
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c
new file mode 100644
index 000000000..31aa185f4
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c
@@ -0,0 +1,1198 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include <linux/ethtool.h>
+#include <linux/linkmode.h>
+#include <linux/netdevice.h>
+#include <linux/nvme.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/pci.h>
+#include <linux/rtnetlink.h>
+#include "funeth.h"
+#include "fun_port.h"
+#include "funeth_txrx.h"
+
+/* Min queue depth. The smallest power-of-2 supporting jumbo frames with 4K
+ * pages is 8. Require it for all types of queues though some could work with
+ * fewer entries.
+ */
+#define FUNETH_MIN_QDEPTH 8
+
+static const char mac_tx_stat_names[][ETH_GSTRING_LEN] = {
+ "mac_tx_octets_total",
+ "mac_tx_frames_total",
+ "mac_tx_vlan_frames_ok",
+ "mac_tx_unicast_frames",
+ "mac_tx_multicast_frames",
+ "mac_tx_broadcast_frames",
+ "mac_tx_errors",
+ "mac_tx_CBFCPAUSE0",
+ "mac_tx_CBFCPAUSE1",
+ "mac_tx_CBFCPAUSE2",
+ "mac_tx_CBFCPAUSE3",
+ "mac_tx_CBFCPAUSE4",
+ "mac_tx_CBFCPAUSE5",
+ "mac_tx_CBFCPAUSE6",
+ "mac_tx_CBFCPAUSE7",
+ "mac_tx_CBFCPAUSE8",
+ "mac_tx_CBFCPAUSE9",
+ "mac_tx_CBFCPAUSE10",
+ "mac_tx_CBFCPAUSE11",
+ "mac_tx_CBFCPAUSE12",
+ "mac_tx_CBFCPAUSE13",
+ "mac_tx_CBFCPAUSE14",
+ "mac_tx_CBFCPAUSE15",
+};
+
+static const char mac_rx_stat_names[][ETH_GSTRING_LEN] = {
+ "mac_rx_octets_total",
+ "mac_rx_frames_total",
+ "mac_rx_VLAN_frames_ok",
+ "mac_rx_unicast_frames",
+ "mac_rx_multicast_frames",
+ "mac_rx_broadcast_frames",
+ "mac_rx_drop_events",
+ "mac_rx_errors",
+ "mac_rx_alignment_errors",
+ "mac_rx_CBFCPAUSE0",
+ "mac_rx_CBFCPAUSE1",
+ "mac_rx_CBFCPAUSE2",
+ "mac_rx_CBFCPAUSE3",
+ "mac_rx_CBFCPAUSE4",
+ "mac_rx_CBFCPAUSE5",
+ "mac_rx_CBFCPAUSE6",
+ "mac_rx_CBFCPAUSE7",
+ "mac_rx_CBFCPAUSE8",
+ "mac_rx_CBFCPAUSE9",
+ "mac_rx_CBFCPAUSE10",
+ "mac_rx_CBFCPAUSE11",
+ "mac_rx_CBFCPAUSE12",
+ "mac_rx_CBFCPAUSE13",
+ "mac_rx_CBFCPAUSE14",
+ "mac_rx_CBFCPAUSE15",
+};
+
+static const char * const txq_stat_names[] = {
+ "tx_pkts",
+ "tx_bytes",
+ "tx_cso",
+ "tx_tso",
+ "tx_encapsulated_tso",
+ "tx_uso",
+ "tx_more",
+ "tx_queue_stops",
+ "tx_queue_restarts",
+ "tx_mapping_errors",
+ "tx_tls_encrypted_packets",
+ "tx_tls_encrypted_bytes",
+ "tx_tls_ooo",
+ "tx_tls_drop_no_sync_data",
+};
+
+static const char * const xdpq_stat_names[] = {
+ "tx_xdp_pkts",
+ "tx_xdp_bytes",
+ "tx_xdp_full",
+ "tx_xdp_mapping_errors",
+};
+
+static const char * const rxq_stat_names[] = {
+ "rx_pkts",
+ "rx_bytes",
+ "rx_cso",
+ "gro_pkts",
+ "gro_merged",
+ "rx_xdp_tx",
+ "rx_xdp_redir",
+ "rx_xdp_drops",
+ "rx_buffers",
+ "rx_page_allocs",
+ "rx_drops",
+ "rx_budget_exhausted",
+ "rx_mapping_errors",
+};
+
+static const char * const tls_stat_names[] = {
+ "tx_tls_ctx",
+ "tx_tls_del",
+ "tx_tls_resync",
+};
+
+static void fun_link_modes_to_ethtool(u64 modes,
+ unsigned long *ethtool_modes_map)
+{
+#define ADD_LINK_MODE(mode) \
+ __set_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, ethtool_modes_map)
+
+ if (modes & FUN_PORT_CAP_AUTONEG)
+ ADD_LINK_MODE(Autoneg);
+ if (modes & FUN_PORT_CAP_1000_X)
+ ADD_LINK_MODE(1000baseX_Full);
+ if (modes & FUN_PORT_CAP_10G_R) {
+ ADD_LINK_MODE(10000baseCR_Full);
+ ADD_LINK_MODE(10000baseSR_Full);
+ ADD_LINK_MODE(10000baseLR_Full);
+ ADD_LINK_MODE(10000baseER_Full);
+ }
+ if (modes & FUN_PORT_CAP_25G_R) {
+ ADD_LINK_MODE(25000baseCR_Full);
+ ADD_LINK_MODE(25000baseSR_Full);
+ }
+ if (modes & FUN_PORT_CAP_40G_R4) {
+ ADD_LINK_MODE(40000baseCR4_Full);
+ ADD_LINK_MODE(40000baseSR4_Full);
+ ADD_LINK_MODE(40000baseLR4_Full);
+ }
+ if (modes & FUN_PORT_CAP_50G_R2) {
+ ADD_LINK_MODE(50000baseCR2_Full);
+ ADD_LINK_MODE(50000baseSR2_Full);
+ }
+ if (modes & FUN_PORT_CAP_50G_R) {
+ ADD_LINK_MODE(50000baseCR_Full);
+ ADD_LINK_MODE(50000baseSR_Full);
+ ADD_LINK_MODE(50000baseLR_ER_FR_Full);
+ }
+ if (modes & FUN_PORT_CAP_100G_R4) {
+ ADD_LINK_MODE(100000baseCR4_Full);
+ ADD_LINK_MODE(100000baseSR4_Full);
+ ADD_LINK_MODE(100000baseLR4_ER4_Full);
+ }
+ if (modes & FUN_PORT_CAP_100G_R2) {
+ ADD_LINK_MODE(100000baseCR2_Full);
+ ADD_LINK_MODE(100000baseSR2_Full);
+ ADD_LINK_MODE(100000baseLR2_ER2_FR2_Full);
+ }
+ if (modes & FUN_PORT_CAP_FEC_NONE)
+ ADD_LINK_MODE(FEC_NONE);
+ if (modes & FUN_PORT_CAP_FEC_FC)
+ ADD_LINK_MODE(FEC_BASER);
+ if (modes & FUN_PORT_CAP_FEC_RS)
+ ADD_LINK_MODE(FEC_RS);
+ if (modes & FUN_PORT_CAP_RX_PAUSE)
+ ADD_LINK_MODE(Pause);
+
+#undef ADD_LINK_MODE
+}
+
+static void set_asym_pause(u64 advertising, struct ethtool_link_ksettings *ks)
+{
+ bool rx_pause, tx_pause;
+
+ rx_pause = advertising & FUN_PORT_CAP_RX_PAUSE;
+ tx_pause = advertising & FUN_PORT_CAP_TX_PAUSE;
+ if (tx_pause ^ rx_pause)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ Asym_Pause);
+}
+
+static unsigned int fun_port_type(unsigned int xcvr)
+{
+ if (!xcvr)
+ return PORT_NONE;
+
+ switch (xcvr & 7) {
+ case FUN_XCVR_BASET:
+ return PORT_TP;
+ case FUN_XCVR_CU:
+ return PORT_DA;
+ default:
+ return PORT_FIBRE;
+ }
+}
+
+static int fun_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *ks)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+ unsigned int seq, speed, xcvr;
+ u64 lp_advertising;
+ bool link_up;
+
+ ethtool_link_ksettings_zero_link_mode(ks, supported);
+ ethtool_link_ksettings_zero_link_mode(ks, advertising);
+ ethtool_link_ksettings_zero_link_mode(ks, lp_advertising);
+
+ /* Link settings change asynchronously, take a consistent snapshot */
+ do {
+ seq = read_seqcount_begin(&fp->link_seq);
+ link_up = netif_carrier_ok(netdev);
+ speed = fp->link_speed;
+ xcvr = fp->xcvr_type;
+ lp_advertising = fp->lp_advertising;
+ } while (read_seqcount_retry(&fp->link_seq, seq));
+
+ if (link_up) {
+ ks->base.speed = speed;
+ ks->base.duplex = DUPLEX_FULL;
+ fun_link_modes_to_ethtool(lp_advertising,
+ ks->link_modes.lp_advertising);
+ } else {
+ ks->base.speed = SPEED_UNKNOWN;
+ ks->base.duplex = DUPLEX_UNKNOWN;
+ }
+
+ ks->base.autoneg = (fp->advertising & FUN_PORT_CAP_AUTONEG) ?
+ AUTONEG_ENABLE : AUTONEG_DISABLE;
+ ks->base.port = fun_port_type(xcvr);
+
+ fun_link_modes_to_ethtool(fp->port_caps, ks->link_modes.supported);
+ if (fp->port_caps & (FUN_PORT_CAP_RX_PAUSE | FUN_PORT_CAP_TX_PAUSE))
+ ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause);
+
+ fun_link_modes_to_ethtool(fp->advertising, ks->link_modes.advertising);
+ set_asym_pause(fp->advertising, ks);
+ return 0;
+}
+
+static u64 fun_advert_modes(const struct ethtool_link_ksettings *ks)
+{
+ u64 modes = 0;
+
+#define HAS_MODE(mode) \
+ ethtool_link_ksettings_test_link_mode(ks, advertising, mode)
+
+ if (HAS_MODE(1000baseX_Full))
+ modes |= FUN_PORT_CAP_1000_X;
+ if (HAS_MODE(10000baseCR_Full) || HAS_MODE(10000baseSR_Full) ||
+ HAS_MODE(10000baseLR_Full) || HAS_MODE(10000baseER_Full))
+ modes |= FUN_PORT_CAP_10G_R;
+ if (HAS_MODE(25000baseCR_Full) || HAS_MODE(25000baseSR_Full))
+ modes |= FUN_PORT_CAP_25G_R;
+ if (HAS_MODE(40000baseCR4_Full) || HAS_MODE(40000baseSR4_Full) ||
+ HAS_MODE(40000baseLR4_Full))
+ modes |= FUN_PORT_CAP_40G_R4;
+ if (HAS_MODE(50000baseCR2_Full) || HAS_MODE(50000baseSR2_Full))
+ modes |= FUN_PORT_CAP_50G_R2;
+ if (HAS_MODE(50000baseCR_Full) || HAS_MODE(50000baseSR_Full) ||
+ HAS_MODE(50000baseLR_ER_FR_Full))
+ modes |= FUN_PORT_CAP_50G_R;
+ if (HAS_MODE(100000baseCR4_Full) || HAS_MODE(100000baseSR4_Full) ||
+ HAS_MODE(100000baseLR4_ER4_Full))
+ modes |= FUN_PORT_CAP_100G_R4;
+ if (HAS_MODE(100000baseCR2_Full) || HAS_MODE(100000baseSR2_Full) ||
+ HAS_MODE(100000baseLR2_ER2_FR2_Full))
+ modes |= FUN_PORT_CAP_100G_R2;
+
+ return modes;
+#undef HAS_MODE
+}
+
+static u64 fun_speed_to_link_mode(unsigned int speed)
+{
+ switch (speed) {
+ case SPEED_100000:
+ return FUN_PORT_CAP_100G_R4 | FUN_PORT_CAP_100G_R2;
+ case SPEED_50000:
+ return FUN_PORT_CAP_50G_R | FUN_PORT_CAP_50G_R2;
+ case SPEED_40000:
+ return FUN_PORT_CAP_40G_R4;
+ case SPEED_25000:
+ return FUN_PORT_CAP_25G_R;
+ case SPEED_10000:
+ return FUN_PORT_CAP_10G_R;
+ case SPEED_1000:
+ return FUN_PORT_CAP_1000_X;
+ default:
+ return 0;
+ }
+}
+
+static int fun_change_advert(struct funeth_priv *fp, u64 new_advert)
+{
+ int err;
+
+ if (new_advert == fp->advertising)
+ return 0;
+
+ err = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_ADVERT, new_advert);
+ if (!err)
+ fp->advertising = new_advert;
+ return err;
+}
+
+#define FUN_PORT_CAP_FEC_MASK \
+ (FUN_PORT_CAP_FEC_NONE | FUN_PORT_CAP_FEC_FC | FUN_PORT_CAP_FEC_RS)
+
+static int fun_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *ks)
+{
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = {};
+ struct funeth_priv *fp = netdev_priv(netdev);
+ u64 new_advert;
+
+ /* eswitch ports don't support mode changes */
+ if (fp->port_caps & FUN_PORT_CAP_VPORT)
+ return -EOPNOTSUPP;
+
+ if (ks->base.duplex == DUPLEX_HALF)
+ return -EINVAL;
+ if (ks->base.autoneg == AUTONEG_ENABLE &&
+ !(fp->port_caps & FUN_PORT_CAP_AUTONEG))
+ return -EINVAL;
+
+ if (ks->base.autoneg == AUTONEG_ENABLE) {
+ if (linkmode_empty(ks->link_modes.advertising))
+ return -EINVAL;
+
+ fun_link_modes_to_ethtool(fp->port_caps, supported);
+ if (!linkmode_subset(ks->link_modes.advertising, supported))
+ return -EINVAL;
+
+ new_advert = fun_advert_modes(ks) | FUN_PORT_CAP_AUTONEG;
+ } else {
+ new_advert = fun_speed_to_link_mode(ks->base.speed);
+ new_advert &= fp->port_caps;
+ if (!new_advert)
+ return -EINVAL;
+ }
+ new_advert |= fp->advertising &
+ (FUN_PORT_CAP_PAUSE_MASK | FUN_PORT_CAP_FEC_MASK);
+
+ return fun_change_advert(fp, new_advert);
+}
+
+static void fun_get_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+ u8 active_pause = fp->active_fc;
+
+ pause->rx_pause = !!(active_pause & FUN_PORT_CAP_RX_PAUSE);
+ pause->tx_pause = !!(active_pause & FUN_PORT_CAP_TX_PAUSE);
+ pause->autoneg = !!(fp->advertising & FUN_PORT_CAP_AUTONEG);
+}
+
+static int fun_set_pauseparam(struct net_device *netdev,
+ struct ethtool_pauseparam *pause)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ u64 new_advert;
+
+ if (fp->port_caps & FUN_PORT_CAP_VPORT)
+ return -EOPNOTSUPP;
+ /* Forcing PAUSE settings with AN enabled is unsupported. */
+ if (!pause->autoneg && (fp->advertising & FUN_PORT_CAP_AUTONEG))
+ return -EOPNOTSUPP;
+ if (pause->autoneg && !(fp->advertising & FUN_PORT_CAP_AUTONEG))
+ return -EINVAL;
+ if (pause->tx_pause && !(fp->port_caps & FUN_PORT_CAP_TX_PAUSE))
+ return -EINVAL;
+ if (pause->rx_pause && !(fp->port_caps & FUN_PORT_CAP_RX_PAUSE))
+ return -EINVAL;
+
+ new_advert = fp->advertising & ~FUN_PORT_CAP_PAUSE_MASK;
+ if (pause->tx_pause)
+ new_advert |= FUN_PORT_CAP_TX_PAUSE;
+ if (pause->rx_pause)
+ new_advert |= FUN_PORT_CAP_RX_PAUSE;
+
+ return fun_change_advert(fp, new_advert);
+}
+
+static int fun_restart_an(struct net_device *netdev)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+
+ if (!(fp->advertising & FUN_PORT_CAP_AUTONEG))
+ return -EOPNOTSUPP;
+
+ return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_ADVERT,
+ FUN_PORT_CAP_AUTONEG);
+}
+
+static int fun_set_phys_id(struct net_device *netdev,
+ enum ethtool_phys_id_state state)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ unsigned int beacon;
+
+ if (fp->port_caps & FUN_PORT_CAP_VPORT)
+ return -EOPNOTSUPP;
+ if (state != ETHTOOL_ID_ACTIVE && state != ETHTOOL_ID_INACTIVE)
+ return -EOPNOTSUPP;
+
+ beacon = state == ETHTOOL_ID_ACTIVE ? FUN_PORT_LED_BEACON_ON :
+ FUN_PORT_LED_BEACON_OFF;
+ return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_LED, beacon);
+}
+
+static void fun_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *info)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+ strscpy(info->bus_info, pci_name(fp->pdev), sizeof(info->bus_info));
+}
+
+static u32 fun_get_msglevel(struct net_device *netdev)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ return fp->msg_enable;
+}
+
+static void fun_set_msglevel(struct net_device *netdev, u32 value)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+
+ fp->msg_enable = value;
+}
+
+static int fun_get_regs_len(struct net_device *dev)
+{
+ return NVME_REG_ACQ + sizeof(u64);
+}
+
+static void fun_get_regs(struct net_device *dev, struct ethtool_regs *regs,
+ void *buf)
+{
+ const struct funeth_priv *fp = netdev_priv(dev);
+ void __iomem *bar = fp->fdev->bar;
+
+ regs->version = 0;
+ *(u64 *)(buf + NVME_REG_CAP) = readq(bar + NVME_REG_CAP);
+ *(u32 *)(buf + NVME_REG_VS) = readl(bar + NVME_REG_VS);
+ *(u32 *)(buf + NVME_REG_INTMS) = readl(bar + NVME_REG_INTMS);
+ *(u32 *)(buf + NVME_REG_INTMC) = readl(bar + NVME_REG_INTMC);
+ *(u32 *)(buf + NVME_REG_CC) = readl(bar + NVME_REG_CC);
+ *(u32 *)(buf + NVME_REG_CSTS) = readl(bar + NVME_REG_CSTS);
+ *(u32 *)(buf + NVME_REG_AQA) = readl(bar + NVME_REG_AQA);
+ *(u64 *)(buf + NVME_REG_ASQ) = readq(bar + NVME_REG_ASQ);
+ *(u64 *)(buf + NVME_REG_ACQ) = readq(bar + NVME_REG_ACQ);
+}
+
+static int fun_get_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kcoal,
+ struct netlink_ext_ack *ext_ack)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ coal->rx_coalesce_usecs = fp->rx_coal_usec;
+ coal->rx_max_coalesced_frames = fp->rx_coal_count;
+ coal->use_adaptive_rx_coalesce = !fp->cq_irq_db;
+ coal->tx_coalesce_usecs = fp->tx_coal_usec;
+ coal->tx_max_coalesced_frames = fp->tx_coal_count;
+ return 0;
+}
+
+static int fun_set_coalesce(struct net_device *netdev,
+ struct ethtool_coalesce *coal,
+ struct kernel_ethtool_coalesce *kcoal,
+ struct netlink_ext_ack *ext_ack)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct funeth_rxq **rxqs;
+ unsigned int i, db_val;
+
+ if (coal->rx_coalesce_usecs > FUN_DB_INTCOAL_USEC_M ||
+ coal->rx_max_coalesced_frames > FUN_DB_INTCOAL_ENTRIES_M ||
+ (coal->rx_coalesce_usecs | coal->rx_max_coalesced_frames) == 0 ||
+ coal->tx_coalesce_usecs > FUN_DB_INTCOAL_USEC_M ||
+ coal->tx_max_coalesced_frames > FUN_DB_INTCOAL_ENTRIES_M ||
+ (coal->tx_coalesce_usecs | coal->tx_max_coalesced_frames) == 0)
+ return -EINVAL;
+
+ /* a timer is required if there's any coalescing */
+ if ((coal->rx_max_coalesced_frames > 1 && !coal->rx_coalesce_usecs) ||
+ (coal->tx_max_coalesced_frames > 1 && !coal->tx_coalesce_usecs))
+ return -EINVAL;
+
+ fp->rx_coal_usec = coal->rx_coalesce_usecs;
+ fp->rx_coal_count = coal->rx_max_coalesced_frames;
+ fp->tx_coal_usec = coal->tx_coalesce_usecs;
+ fp->tx_coal_count = coal->tx_max_coalesced_frames;
+
+ db_val = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count);
+ WRITE_ONCE(fp->cq_irq_db, db_val);
+
+ rxqs = rtnl_dereference(fp->rxqs);
+ if (!rxqs)
+ return 0;
+
+ for (i = 0; i < netdev->real_num_rx_queues; i++)
+ WRITE_ONCE(rxqs[i]->irq_db_val, db_val);
+
+ db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec, fp->tx_coal_count);
+ for (i = 0; i < netdev->real_num_tx_queues; i++)
+ WRITE_ONCE(fp->txqs[i]->irq_db_val, db_val);
+
+ return 0;
+}
+
+static void fun_get_channels(struct net_device *netdev,
+ struct ethtool_channels *chan)
+{
+ chan->max_rx = netdev->num_rx_queues;
+ chan->rx_count = netdev->real_num_rx_queues;
+
+ chan->max_tx = netdev->num_tx_queues;
+ chan->tx_count = netdev->real_num_tx_queues;
+}
+
+static int fun_set_channels(struct net_device *netdev,
+ struct ethtool_channels *chan)
+{
+ if (!chan->tx_count || !chan->rx_count)
+ return -EINVAL;
+
+ if (chan->tx_count == netdev->real_num_tx_queues &&
+ chan->rx_count == netdev->real_num_rx_queues)
+ return 0;
+
+ if (netif_running(netdev))
+ return fun_change_num_queues(netdev, chan->tx_count,
+ chan->rx_count);
+
+ fun_set_ring_count(netdev, chan->tx_count, chan->rx_count);
+ return 0;
+}
+
+static void fun_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kring,
+ struct netlink_ext_ack *extack)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+ unsigned int max_depth = fp->fdev->q_depth;
+
+ /* We size CQs to be twice the RQ depth so max RQ depth is half the
+ * max queue depth.
+ */
+ ring->rx_max_pending = max_depth / 2;
+ ring->tx_max_pending = max_depth;
+
+ ring->rx_pending = fp->rq_depth;
+ ring->tx_pending = fp->sq_depth;
+
+ kring->rx_buf_len = PAGE_SIZE;
+ kring->cqe_size = FUNETH_CQE_SIZE;
+}
+
+static int fun_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ring,
+ struct kernel_ethtool_ringparam *kring,
+ struct netlink_ext_ack *extack)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ int rc;
+
+ if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+ return -EINVAL;
+
+ /* queue depths must be powers-of-2 */
+ if (!is_power_of_2(ring->rx_pending) ||
+ !is_power_of_2(ring->tx_pending))
+ return -EINVAL;
+
+ if (ring->rx_pending < FUNETH_MIN_QDEPTH ||
+ ring->tx_pending < FUNETH_MIN_QDEPTH)
+ return -EINVAL;
+
+ if (fp->sq_depth == ring->tx_pending &&
+ fp->rq_depth == ring->rx_pending)
+ return 0;
+
+ if (netif_running(netdev)) {
+ struct fun_qset req = {
+ .cq_depth = 2 * ring->rx_pending,
+ .rq_depth = ring->rx_pending,
+ .sq_depth = ring->tx_pending
+ };
+
+ rc = fun_replace_queues(netdev, &req, extack);
+ if (rc)
+ return rc;
+ }
+
+ fp->sq_depth = ring->tx_pending;
+ fp->rq_depth = ring->rx_pending;
+ fp->cq_depth = 2 * fp->rq_depth;
+ return 0;
+}
+
+static int fun_get_sset_count(struct net_device *dev, int sset)
+{
+ const struct funeth_priv *fp = netdev_priv(dev);
+ int n;
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ n = (dev->real_num_tx_queues + 1) * ARRAY_SIZE(txq_stat_names) +
+ (dev->real_num_rx_queues + 1) * ARRAY_SIZE(rxq_stat_names) +
+ (fp->num_xdpqs + 1) * ARRAY_SIZE(xdpq_stat_names) +
+ ARRAY_SIZE(tls_stat_names);
+ if (fp->port_caps & FUN_PORT_CAP_STATS) {
+ n += ARRAY_SIZE(mac_tx_stat_names) +
+ ARRAY_SIZE(mac_rx_stat_names);
+ }
+ return n;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void fun_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+ unsigned int i, j;
+ u8 *p = data;
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ if (fp->port_caps & FUN_PORT_CAP_STATS) {
+ memcpy(p, mac_tx_stat_names, sizeof(mac_tx_stat_names));
+ p += sizeof(mac_tx_stat_names);
+ memcpy(p, mac_rx_stat_names, sizeof(mac_rx_stat_names));
+ p += sizeof(mac_rx_stat_names);
+ }
+
+ for (i = 0; i < netdev->real_num_tx_queues; i++) {
+ for (j = 0; j < ARRAY_SIZE(txq_stat_names); j++)
+ ethtool_sprintf(&p, "%s[%u]", txq_stat_names[j],
+ i);
+ }
+ for (j = 0; j < ARRAY_SIZE(txq_stat_names); j++)
+ ethtool_sprintf(&p, txq_stat_names[j]);
+
+ for (i = 0; i < fp->num_xdpqs; i++) {
+ for (j = 0; j < ARRAY_SIZE(xdpq_stat_names); j++)
+ ethtool_sprintf(&p, "%s[%u]",
+ xdpq_stat_names[j], i);
+ }
+ for (j = 0; j < ARRAY_SIZE(xdpq_stat_names); j++)
+ ethtool_sprintf(&p, xdpq_stat_names[j]);
+
+ for (i = 0; i < netdev->real_num_rx_queues; i++) {
+ for (j = 0; j < ARRAY_SIZE(rxq_stat_names); j++)
+ ethtool_sprintf(&p, "%s[%u]", rxq_stat_names[j],
+ i);
+ }
+ for (j = 0; j < ARRAY_SIZE(rxq_stat_names); j++)
+ ethtool_sprintf(&p, rxq_stat_names[j]);
+
+ for (j = 0; j < ARRAY_SIZE(tls_stat_names); j++)
+ ethtool_sprintf(&p, tls_stat_names[j]);
+ break;
+ default:
+ break;
+ }
+}
+
+static u64 *get_mac_stats(const struct funeth_priv *fp, u64 *data)
+{
+#define TX_STAT(s) \
+ *data++ = be64_to_cpu(fp->stats[PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_##s])
+
+ TX_STAT(etherStatsOctets);
+ TX_STAT(etherStatsPkts);
+ TX_STAT(VLANTransmittedOK);
+ TX_STAT(ifOutUcastPkts);
+ TX_STAT(ifOutMulticastPkts);
+ TX_STAT(ifOutBroadcastPkts);
+ TX_STAT(ifOutErrors);
+ TX_STAT(CBFCPAUSEFramesTransmitted_0);
+ TX_STAT(CBFCPAUSEFramesTransmitted_1);
+ TX_STAT(CBFCPAUSEFramesTransmitted_2);
+ TX_STAT(CBFCPAUSEFramesTransmitted_3);
+ TX_STAT(CBFCPAUSEFramesTransmitted_4);
+ TX_STAT(CBFCPAUSEFramesTransmitted_5);
+ TX_STAT(CBFCPAUSEFramesTransmitted_6);
+ TX_STAT(CBFCPAUSEFramesTransmitted_7);
+ TX_STAT(CBFCPAUSEFramesTransmitted_8);
+ TX_STAT(CBFCPAUSEFramesTransmitted_9);
+ TX_STAT(CBFCPAUSEFramesTransmitted_10);
+ TX_STAT(CBFCPAUSEFramesTransmitted_11);
+ TX_STAT(CBFCPAUSEFramesTransmitted_12);
+ TX_STAT(CBFCPAUSEFramesTransmitted_13);
+ TX_STAT(CBFCPAUSEFramesTransmitted_14);
+ TX_STAT(CBFCPAUSEFramesTransmitted_15);
+
+#define RX_STAT(s) *data++ = be64_to_cpu(fp->stats[PORT_MAC_RX_##s])
+
+ RX_STAT(etherStatsOctets);
+ RX_STAT(etherStatsPkts);
+ RX_STAT(VLANReceivedOK);
+ RX_STAT(ifInUcastPkts);
+ RX_STAT(ifInMulticastPkts);
+ RX_STAT(ifInBroadcastPkts);
+ RX_STAT(etherStatsDropEvents);
+ RX_STAT(ifInErrors);
+ RX_STAT(aAlignmentErrors);
+ RX_STAT(CBFCPAUSEFramesReceived_0);
+ RX_STAT(CBFCPAUSEFramesReceived_1);
+ RX_STAT(CBFCPAUSEFramesReceived_2);
+ RX_STAT(CBFCPAUSEFramesReceived_3);
+ RX_STAT(CBFCPAUSEFramesReceived_4);
+ RX_STAT(CBFCPAUSEFramesReceived_5);
+ RX_STAT(CBFCPAUSEFramesReceived_6);
+ RX_STAT(CBFCPAUSEFramesReceived_7);
+ RX_STAT(CBFCPAUSEFramesReceived_8);
+ RX_STAT(CBFCPAUSEFramesReceived_9);
+ RX_STAT(CBFCPAUSEFramesReceived_10);
+ RX_STAT(CBFCPAUSEFramesReceived_11);
+ RX_STAT(CBFCPAUSEFramesReceived_12);
+ RX_STAT(CBFCPAUSEFramesReceived_13);
+ RX_STAT(CBFCPAUSEFramesReceived_14);
+ RX_STAT(CBFCPAUSEFramesReceived_15);
+
+ return data;
+
+#undef TX_STAT
+#undef RX_STAT
+}
+
+static void fun_get_ethtool_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+ struct funeth_txq_stats txs;
+ struct funeth_rxq_stats rxs;
+ struct funeth_txq **xdpqs;
+ struct funeth_rxq **rxqs;
+ unsigned int i, start;
+ u64 *totals, *tot;
+
+ if (fp->port_caps & FUN_PORT_CAP_STATS)
+ data = get_mac_stats(fp, data);
+
+ rxqs = rtnl_dereference(fp->rxqs);
+ if (!rxqs)
+ return;
+
+#define ADD_STAT(cnt) do { \
+ *data = (cnt); *tot++ += *data++; \
+} while (0)
+
+ /* Tx queues */
+ totals = data + netdev->real_num_tx_queues * ARRAY_SIZE(txq_stat_names);
+
+ for (i = 0; i < netdev->real_num_tx_queues; i++) {
+ tot = totals;
+
+ FUN_QSTAT_READ(fp->txqs[i], start, txs);
+
+ ADD_STAT(txs.tx_pkts);
+ ADD_STAT(txs.tx_bytes);
+ ADD_STAT(txs.tx_cso);
+ ADD_STAT(txs.tx_tso);
+ ADD_STAT(txs.tx_encap_tso);
+ ADD_STAT(txs.tx_uso);
+ ADD_STAT(txs.tx_more);
+ ADD_STAT(txs.tx_nstops);
+ ADD_STAT(txs.tx_nrestarts);
+ ADD_STAT(txs.tx_map_err);
+ ADD_STAT(txs.tx_tls_pkts);
+ ADD_STAT(txs.tx_tls_bytes);
+ ADD_STAT(txs.tx_tls_fallback);
+ ADD_STAT(txs.tx_tls_drops);
+ }
+ data += ARRAY_SIZE(txq_stat_names);
+
+ /* XDP Tx queues */
+ xdpqs = rtnl_dereference(fp->xdpqs);
+ totals = data + fp->num_xdpqs * ARRAY_SIZE(xdpq_stat_names);
+
+ for (i = 0; i < fp->num_xdpqs; i++) {
+ tot = totals;
+
+ FUN_QSTAT_READ(xdpqs[i], start, txs);
+
+ ADD_STAT(txs.tx_pkts);
+ ADD_STAT(txs.tx_bytes);
+ ADD_STAT(txs.tx_xdp_full);
+ ADD_STAT(txs.tx_map_err);
+ }
+ data += ARRAY_SIZE(xdpq_stat_names);
+
+ /* Rx queues */
+ totals = data + netdev->real_num_rx_queues * ARRAY_SIZE(rxq_stat_names);
+
+ for (i = 0; i < netdev->real_num_rx_queues; i++) {
+ tot = totals;
+
+ FUN_QSTAT_READ(rxqs[i], start, rxs);
+
+ ADD_STAT(rxs.rx_pkts);
+ ADD_STAT(rxs.rx_bytes);
+ ADD_STAT(rxs.rx_cso);
+ ADD_STAT(rxs.gro_pkts);
+ ADD_STAT(rxs.gro_merged);
+ ADD_STAT(rxs.xdp_tx);
+ ADD_STAT(rxs.xdp_redir);
+ ADD_STAT(rxs.xdp_drops);
+ ADD_STAT(rxs.rx_bufs);
+ ADD_STAT(rxs.rx_page_alloc);
+ ADD_STAT(rxs.rx_mem_drops + rxs.xdp_err);
+ ADD_STAT(rxs.rx_budget);
+ ADD_STAT(rxs.rx_map_err);
+ }
+ data += ARRAY_SIZE(rxq_stat_names);
+#undef ADD_STAT
+
+ *data++ = atomic64_read(&fp->tx_tls_add);
+ *data++ = atomic64_read(&fp->tx_tls_del);
+ *data++ = atomic64_read(&fp->tx_tls_resync);
+}
+
+#define RX_STAT(fp, s) be64_to_cpu((fp)->stats[PORT_MAC_RX_##s])
+#define TX_STAT(fp, s) \
+ be64_to_cpu((fp)->stats[PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_##s])
+#define FEC_STAT(fp, s) \
+ be64_to_cpu((fp)->stats[PORT_MAC_RX_STATS_MAX + \
+ PORT_MAC_TX_STATS_MAX + PORT_MAC_FEC_##s])
+
+static void fun_get_pause_stats(struct net_device *netdev,
+ struct ethtool_pause_stats *stats)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ if (!(fp->port_caps & FUN_PORT_CAP_STATS))
+ return;
+
+ stats->tx_pause_frames = TX_STAT(fp, aPAUSEMACCtrlFramesTransmitted);
+ stats->rx_pause_frames = RX_STAT(fp, aPAUSEMACCtrlFramesReceived);
+}
+
+static void fun_get_802_3_stats(struct net_device *netdev,
+ struct ethtool_eth_mac_stats *stats)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ if (!(fp->port_caps & FUN_PORT_CAP_STATS))
+ return;
+
+ stats->FramesTransmittedOK = TX_STAT(fp, aFramesTransmittedOK);
+ stats->FramesReceivedOK = RX_STAT(fp, aFramesReceivedOK);
+ stats->FrameCheckSequenceErrors = RX_STAT(fp, aFrameCheckSequenceErrors);
+ stats->OctetsTransmittedOK = TX_STAT(fp, OctetsTransmittedOK);
+ stats->OctetsReceivedOK = RX_STAT(fp, OctetsReceivedOK);
+ stats->InRangeLengthErrors = RX_STAT(fp, aInRangeLengthErrors);
+ stats->FrameTooLongErrors = RX_STAT(fp, aFrameTooLongErrors);
+}
+
+static void fun_get_802_3_ctrl_stats(struct net_device *netdev,
+ struct ethtool_eth_ctrl_stats *stats)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ if (!(fp->port_caps & FUN_PORT_CAP_STATS))
+ return;
+
+ stats->MACControlFramesTransmitted = TX_STAT(fp, MACControlFramesTransmitted);
+ stats->MACControlFramesReceived = RX_STAT(fp, MACControlFramesReceived);
+}
+
+static void fun_get_rmon_stats(struct net_device *netdev,
+ struct ethtool_rmon_stats *stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ static const struct ethtool_rmon_hist_range rmon_ranges[] = {
+ { 64, 64 },
+ { 65, 127 },
+ { 128, 255 },
+ { 256, 511 },
+ { 512, 1023 },
+ { 1024, 1518 },
+ { 1519, 32767 },
+ {}
+ };
+
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ if (!(fp->port_caps & FUN_PORT_CAP_STATS))
+ return;
+
+ stats->undersize_pkts = RX_STAT(fp, etherStatsUndersizePkts);
+ stats->oversize_pkts = RX_STAT(fp, etherStatsOversizePkts);
+ stats->fragments = RX_STAT(fp, etherStatsFragments);
+ stats->jabbers = RX_STAT(fp, etherStatsJabbers);
+
+ stats->hist[0] = RX_STAT(fp, etherStatsPkts64Octets);
+ stats->hist[1] = RX_STAT(fp, etherStatsPkts65to127Octets);
+ stats->hist[2] = RX_STAT(fp, etherStatsPkts128to255Octets);
+ stats->hist[3] = RX_STAT(fp, etherStatsPkts256to511Octets);
+ stats->hist[4] = RX_STAT(fp, etherStatsPkts512to1023Octets);
+ stats->hist[5] = RX_STAT(fp, etherStatsPkts1024to1518Octets);
+ stats->hist[6] = RX_STAT(fp, etherStatsPkts1519toMaxOctets);
+
+ stats->hist_tx[0] = TX_STAT(fp, etherStatsPkts64Octets);
+ stats->hist_tx[1] = TX_STAT(fp, etherStatsPkts65to127Octets);
+ stats->hist_tx[2] = TX_STAT(fp, etherStatsPkts128to255Octets);
+ stats->hist_tx[3] = TX_STAT(fp, etherStatsPkts256to511Octets);
+ stats->hist_tx[4] = TX_STAT(fp, etherStatsPkts512to1023Octets);
+ stats->hist_tx[5] = TX_STAT(fp, etherStatsPkts1024to1518Octets);
+ stats->hist_tx[6] = TX_STAT(fp, etherStatsPkts1519toMaxOctets);
+
+ *ranges = rmon_ranges;
+}
+
+static void fun_get_fec_stats(struct net_device *netdev,
+ struct ethtool_fec_stats *stats)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ if (!(fp->port_caps & FUN_PORT_CAP_STATS))
+ return;
+
+ stats->corrected_blocks.total = FEC_STAT(fp, Correctable);
+ stats->uncorrectable_blocks.total = FEC_STAT(fp, Uncorrectable);
+}
+
+#undef RX_STAT
+#undef TX_STAT
+#undef FEC_STAT
+
+static int fun_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ switch (cmd->cmd) {
+ case ETHTOOL_GRXRINGS:
+ cmd->data = netdev->real_num_rx_queues;
+ return 0;
+ default:
+ break;
+ }
+ return -EOPNOTSUPP;
+}
+
+static int fun_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info)
+{
+ return 0;
+}
+
+static u32 fun_get_rxfh_indir_size(struct net_device *netdev)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ return fp->indir_table_nentries;
+}
+
+static u32 fun_get_rxfh_key_size(struct net_device *netdev)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ return sizeof(fp->rss_key);
+}
+
+static int fun_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ const struct funeth_priv *fp = netdev_priv(netdev);
+
+ if (!fp->rss_cfg)
+ return -EOPNOTSUPP;
+
+ if (indir)
+ memcpy(indir, fp->indir_table,
+ sizeof(u32) * fp->indir_table_nentries);
+
+ if (key)
+ memcpy(key, fp->rss_key, sizeof(fp->rss_key));
+
+ if (hfunc)
+ *hfunc = fp->hash_algo == FUN_ETH_RSS_ALG_TOEPLITZ ?
+ ETH_RSS_HASH_TOP : ETH_RSS_HASH_CRC32;
+
+ return 0;
+}
+
+static int fun_set_rxfh(struct net_device *netdev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ const u32 *rss_indir = indir ? indir : fp->indir_table;
+ const u8 *rss_key = key ? key : fp->rss_key;
+ enum fun_eth_hash_alg algo;
+
+ if (!fp->rss_cfg)
+ return -EOPNOTSUPP;
+
+ if (hfunc == ETH_RSS_HASH_NO_CHANGE)
+ algo = fp->hash_algo;
+ else if (hfunc == ETH_RSS_HASH_CRC32)
+ algo = FUN_ETH_RSS_ALG_CRC32;
+ else if (hfunc == ETH_RSS_HASH_TOP)
+ algo = FUN_ETH_RSS_ALG_TOEPLITZ;
+ else
+ return -EINVAL;
+
+ /* If the port is enabled try to reconfigure RSS and keep the new
+ * settings if successful. If it is down we update the RSS settings
+ * and apply them at the next UP time.
+ */
+ if (netif_running(netdev)) {
+ int rc = fun_config_rss(netdev, algo, rss_key, rss_indir,
+ FUN_ADMIN_SUBOP_MODIFY);
+ if (rc)
+ return rc;
+ }
+
+ fp->hash_algo = algo;
+ if (key)
+ memcpy(fp->rss_key, key, sizeof(fp->rss_key));
+ if (indir)
+ memcpy(fp->indir_table, indir,
+ sizeof(u32) * fp->indir_table_nentries);
+ return 0;
+}
+
+static int fun_get_ts_info(struct net_device *netdev,
+ struct ethtool_ts_info *info)
+{
+ info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ info->phc_index = -1;
+ info->tx_types = BIT(HWTSTAMP_TX_OFF);
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+ return 0;
+}
+
+static unsigned int to_ethtool_fec(unsigned int fun_fec)
+{
+ unsigned int fec = 0;
+
+ if (fun_fec == FUN_PORT_FEC_NA)
+ fec |= ETHTOOL_FEC_NONE;
+ if (fun_fec & FUN_PORT_FEC_OFF)
+ fec |= ETHTOOL_FEC_OFF;
+ if (fun_fec & FUN_PORT_FEC_RS)
+ fec |= ETHTOOL_FEC_RS;
+ if (fun_fec & FUN_PORT_FEC_FC)
+ fec |= ETHTOOL_FEC_BASER;
+ if (fun_fec & FUN_PORT_FEC_AUTO)
+ fec |= ETHTOOL_FEC_AUTO;
+ return fec;
+}
+
+static int fun_get_fecparam(struct net_device *netdev,
+ struct ethtool_fecparam *fec)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ u64 fec_data;
+ int rc;
+
+ rc = fun_port_read_cmd(fp, FUN_ADMIN_PORT_KEY_FEC, &fec_data);
+ if (rc)
+ return rc;
+
+ fec->active_fec = to_ethtool_fec(fec_data & 0xff);
+ fec->fec = to_ethtool_fec(fec_data >> 8);
+ return 0;
+}
+
+static int fun_set_fecparam(struct net_device *netdev,
+ struct ethtool_fecparam *fec)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ u64 fec_mode;
+
+ switch (fec->fec) {
+ case ETHTOOL_FEC_AUTO:
+ fec_mode = FUN_PORT_FEC_AUTO;
+ break;
+ case ETHTOOL_FEC_OFF:
+ if (!(fp->port_caps & FUN_PORT_CAP_FEC_NONE))
+ return -EINVAL;
+ fec_mode = FUN_PORT_FEC_OFF;
+ break;
+ case ETHTOOL_FEC_BASER:
+ if (!(fp->port_caps & FUN_PORT_CAP_FEC_FC))
+ return -EINVAL;
+ fec_mode = FUN_PORT_FEC_FC;
+ break;
+ case ETHTOOL_FEC_RS:
+ if (!(fp->port_caps & FUN_PORT_CAP_FEC_RS))
+ return -EINVAL;
+ fec_mode = FUN_PORT_FEC_RS;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_FEC, fec_mode);
+}
+
+static int fun_get_port_module_page(struct net_device *netdev,
+ const struct ethtool_module_eeprom *req,
+ struct netlink_ext_ack *extack)
+{
+ union {
+ struct fun_admin_port_req req;
+ struct fun_admin_port_xcvr_read_rsp rsp;
+ } cmd;
+ struct funeth_priv *fp = netdev_priv(netdev);
+ int rc;
+
+ if (fp->port_caps & FUN_PORT_CAP_VPORT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Specified port is virtual, only physical ports have modules");
+ return -EOPNOTSUPP;
+ }
+
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
+ sizeof(cmd.req));
+ cmd.req.u.xcvr_read =
+ FUN_ADMIN_PORT_XCVR_READ_REQ_INIT(0, netdev->dev_port,
+ req->bank, req->page,
+ req->offset, req->length,
+ req->i2c_address);
+ rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp,
+ sizeof(cmd.rsp), 0);
+ if (rc)
+ return rc;
+
+ memcpy(req->data, cmd.rsp.data, req->length);
+ return req->length;
+}
+
+static const struct ethtool_ops fun_ethtool_ops = {
+ .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+ ETHTOOL_COALESCE_MAX_FRAMES,
+ .get_link_ksettings = fun_get_link_ksettings,
+ .set_link_ksettings = fun_set_link_ksettings,
+ .set_phys_id = fun_set_phys_id,
+ .get_drvinfo = fun_get_drvinfo,
+ .get_msglevel = fun_get_msglevel,
+ .set_msglevel = fun_set_msglevel,
+ .get_regs_len = fun_get_regs_len,
+ .get_regs = fun_get_regs,
+ .get_link = ethtool_op_get_link,
+ .get_coalesce = fun_get_coalesce,
+ .set_coalesce = fun_set_coalesce,
+ .get_ts_info = fun_get_ts_info,
+ .get_ringparam = fun_get_ringparam,
+ .set_ringparam = fun_set_ringparam,
+ .get_sset_count = fun_get_sset_count,
+ .get_strings = fun_get_strings,
+ .get_ethtool_stats = fun_get_ethtool_stats,
+ .get_rxnfc = fun_get_rxnfc,
+ .set_rxnfc = fun_set_rxnfc,
+ .get_rxfh_indir_size = fun_get_rxfh_indir_size,
+ .get_rxfh_key_size = fun_get_rxfh_key_size,
+ .get_rxfh = fun_get_rxfh,
+ .set_rxfh = fun_set_rxfh,
+ .get_channels = fun_get_channels,
+ .set_channels = fun_set_channels,
+ .get_fecparam = fun_get_fecparam,
+ .set_fecparam = fun_set_fecparam,
+ .get_pauseparam = fun_get_pauseparam,
+ .set_pauseparam = fun_set_pauseparam,
+ .nway_reset = fun_restart_an,
+ .get_pause_stats = fun_get_pause_stats,
+ .get_fec_stats = fun_get_fec_stats,
+ .get_eth_mac_stats = fun_get_802_3_stats,
+ .get_eth_ctrl_stats = fun_get_802_3_ctrl_stats,
+ .get_rmon_stats = fun_get_rmon_stats,
+ .get_module_eeprom_by_page = fun_get_port_module_page,
+};
+
+void fun_set_ethtool_ops(struct net_device *netdev)
+{
+ netdev->ethtool_ops = &fun_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ktls.c b/drivers/net/ethernet/fungible/funeth/funeth_ktls.c
new file mode 100644
index 000000000..f871def70
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_ktls.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include "funeth.h"
+#include "funeth_ktls.h"
+
+static int fun_admin_ktls_create(struct funeth_priv *fp, unsigned int id)
+{
+ struct fun_admin_ktls_create_req req = {
+ .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS,
+ sizeof(req)),
+ .subop = FUN_ADMIN_SUBOP_CREATE,
+ .id = cpu_to_be32(id),
+ };
+
+ return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
+}
+
+static int fun_ktls_add(struct net_device *netdev, struct sock *sk,
+ enum tls_offload_ctx_dir direction,
+ struct tls_crypto_info *crypto_info,
+ u32 start_offload_tcp_sn)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct fun_admin_ktls_modify_req req = {
+ .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS,
+ sizeof(req)),
+ .subop = FUN_ADMIN_SUBOP_MODIFY,
+ .id = cpu_to_be32(fp->ktls_id),
+ .tcp_seq = cpu_to_be32(start_offload_tcp_sn),
+ };
+ struct fun_admin_ktls_modify_rsp rsp;
+ struct fun_ktls_tx_ctx *tx_ctx;
+ int rc;
+
+ if (direction != TLS_OFFLOAD_CTX_DIR_TX)
+ return -EOPNOTSUPP;
+
+ if (crypto_info->version == TLS_1_2_VERSION)
+ req.version = FUN_KTLS_TLSV2;
+ else
+ return -EOPNOTSUPP;
+
+ switch (crypto_info->cipher_type) {
+ case TLS_CIPHER_AES_GCM_128: {
+ struct tls12_crypto_info_aes_gcm_128 *c = (void *)crypto_info;
+
+ req.cipher = FUN_KTLS_CIPHER_AES_GCM_128;
+ memcpy(req.key, c->key, sizeof(c->key));
+ memcpy(req.iv, c->iv, sizeof(c->iv));
+ memcpy(req.salt, c->salt, sizeof(c->salt));
+ memcpy(req.record_seq, c->rec_seq, sizeof(c->rec_seq));
+ break;
+ }
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ rc = fun_submit_admin_sync_cmd(fp->fdev, &req.common, &rsp,
+ sizeof(rsp), 0);
+ memzero_explicit(&req, sizeof(req));
+ if (rc)
+ return rc;
+
+ tx_ctx = tls_driver_ctx(sk, direction);
+ tx_ctx->tlsid = rsp.tlsid;
+ tx_ctx->next_seq = start_offload_tcp_sn;
+ atomic64_inc(&fp->tx_tls_add);
+ return 0;
+}
+
+static void fun_ktls_del(struct net_device *netdev,
+ struct tls_context *tls_ctx,
+ enum tls_offload_ctx_dir direction)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct fun_admin_ktls_modify_req req;
+ struct fun_ktls_tx_ctx *tx_ctx;
+
+ if (direction != TLS_OFFLOAD_CTX_DIR_TX)
+ return;
+
+ tx_ctx = __tls_driver_ctx(tls_ctx, direction);
+
+ req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS,
+ offsetof(struct fun_admin_ktls_modify_req, tcp_seq));
+ req.subop = FUN_ADMIN_SUBOP_MODIFY;
+ req.flags = cpu_to_be16(FUN_KTLS_MODIFY_REMOVE);
+ req.id = cpu_to_be32(fp->ktls_id);
+ req.tlsid = tx_ctx->tlsid;
+
+ fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
+ atomic64_inc(&fp->tx_tls_del);
+}
+
+static int fun_ktls_resync(struct net_device *netdev, struct sock *sk, u32 seq,
+ u8 *rcd_sn, enum tls_offload_ctx_dir direction)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct fun_admin_ktls_modify_req req;
+ struct fun_ktls_tx_ctx *tx_ctx;
+ int rc;
+
+ if (direction != TLS_OFFLOAD_CTX_DIR_TX)
+ return -EOPNOTSUPP;
+
+ tx_ctx = tls_driver_ctx(sk, direction);
+
+ req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_KTLS,
+ offsetof(struct fun_admin_ktls_modify_req, key));
+ req.subop = FUN_ADMIN_SUBOP_MODIFY;
+ req.flags = 0;
+ req.id = cpu_to_be32(fp->ktls_id);
+ req.tlsid = tx_ctx->tlsid;
+ req.tcp_seq = cpu_to_be32(seq);
+ req.version = 0;
+ req.cipher = 0;
+ memcpy(req.record_seq, rcd_sn, sizeof(req.record_seq));
+
+ atomic64_inc(&fp->tx_tls_resync);
+ rc = fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
+ if (!rc)
+ tx_ctx->next_seq = seq;
+ return rc;
+}
+
+static const struct tlsdev_ops fun_ktls_ops = {
+ .tls_dev_add = fun_ktls_add,
+ .tls_dev_del = fun_ktls_del,
+ .tls_dev_resync = fun_ktls_resync,
+};
+
+int fun_ktls_init(struct net_device *netdev)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ int rc;
+
+ rc = fun_admin_ktls_create(fp, netdev->dev_port);
+ if (rc)
+ return rc;
+
+ fp->ktls_id = netdev->dev_port;
+ netdev->tlsdev_ops = &fun_ktls_ops;
+ netdev->hw_features |= NETIF_F_HW_TLS_TX;
+ netdev->features |= NETIF_F_HW_TLS_TX;
+ return 0;
+}
+
+void fun_ktls_cleanup(struct funeth_priv *fp)
+{
+ if (fp->ktls_id == FUN_HCI_ID_INVALID)
+ return;
+
+ fun_res_destroy(fp->fdev, FUN_ADMIN_OP_KTLS, 0, fp->ktls_id);
+ fp->ktls_id = FUN_HCI_ID_INVALID;
+}
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ktls.h b/drivers/net/ethernet/fungible/funeth/funeth_ktls.h
new file mode 100644
index 000000000..9d6f2141a
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_ktls.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef _FUN_KTLS_H
+#define _FUN_KTLS_H
+
+#include <net/tls.h>
+
+struct funeth_priv;
+
+struct fun_ktls_tx_ctx {
+ __be64 tlsid;
+ u32 next_seq;
+};
+
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+int fun_ktls_init(struct net_device *netdev);
+void fun_ktls_cleanup(struct funeth_priv *fp);
+
+#else
+
+static inline void fun_ktls_init(struct net_device *netdev)
+{
+}
+
+static inline void fun_ktls_cleanup(struct funeth_priv *fp)
+{
+}
+#endif
+
+#endif /* _FUN_KTLS_H */
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c
new file mode 100644
index 000000000..095f51c4d
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c
@@ -0,0 +1,2086 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include <linux/bpf.h>
+#include <linux/crash_dump.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/idr.h>
+#include <linux/if_vlan.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
+
+#include "funeth.h"
+#include "funeth_devlink.h"
+#include "funeth_ktls.h"
+#include "fun_port.h"
+#include "fun_queue.h"
+#include "funeth_txrx.h"
+
+#define ADMIN_SQ_DEPTH 32
+#define ADMIN_CQ_DEPTH 64
+#define ADMIN_RQ_DEPTH 16
+
+/* Default number of Tx/Rx queues. */
+#define FUN_DFLT_QUEUES 16U
+
+enum {
+ FUN_SERV_RES_CHANGE = FUN_SERV_FIRST_AVAIL,
+ FUN_SERV_DEL_PORTS,
+};
+
+static const struct pci_device_id funeth_id_table[] = {
+ { PCI_VDEVICE(FUNGIBLE, 0x0101) },
+ { PCI_VDEVICE(FUNGIBLE, 0x0181) },
+ { 0, }
+};
+
+/* Issue a port write admin command with @n key/value pairs. */
+static int fun_port_write_cmds(struct funeth_priv *fp, unsigned int n,
+ const int *keys, const u64 *data)
+{
+ unsigned int cmd_size, i;
+ union {
+ struct fun_admin_port_req req;
+ struct fun_admin_port_rsp rsp;
+ u8 v[ADMIN_SQE_SIZE];
+ } cmd;
+
+ cmd_size = offsetof(struct fun_admin_port_req, u.write.write48) +
+ n * sizeof(struct fun_admin_write48_req);
+ if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN)
+ return -EINVAL;
+
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
+ cmd_size);
+ cmd.req.u.write =
+ FUN_ADMIN_PORT_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE, 0,
+ fp->netdev->dev_port);
+ for (i = 0; i < n; i++)
+ cmd.req.u.write.write48[i] =
+ FUN_ADMIN_WRITE48_REQ_INIT(keys[i], data[i]);
+
+ return fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
+ &cmd.rsp, cmd_size, 0);
+}
+
+int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data)
+{
+ return fun_port_write_cmds(fp, 1, &key, &data);
+}
+
+/* Issue a port read admin command with @n key/value pairs. */
+static int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n,
+ const int *keys, u64 *data)
+{
+ const struct fun_admin_read48_rsp *r48rsp;
+ unsigned int cmd_size, i;
+ int rc;
+ union {
+ struct fun_admin_port_req req;
+ struct fun_admin_port_rsp rsp;
+ u8 v[ADMIN_SQE_SIZE];
+ } cmd;
+
+ cmd_size = offsetof(struct fun_admin_port_req, u.read.read48) +
+ n * sizeof(struct fun_admin_read48_req);
+ if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN)
+ return -EINVAL;
+
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
+ cmd_size);
+ cmd.req.u.read =
+ FUN_ADMIN_PORT_READ_REQ_INIT(FUN_ADMIN_SUBOP_READ, 0,
+ fp->netdev->dev_port);
+ for (i = 0; i < n; i++)
+ cmd.req.u.read.read48[i] = FUN_ADMIN_READ48_REQ_INIT(keys[i]);
+
+ rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
+ &cmd.rsp, cmd_size, 0);
+ if (rc)
+ return rc;
+
+ for (r48rsp = cmd.rsp.u.read.read48, i = 0; i < n; i++, r48rsp++) {
+ data[i] = FUN_ADMIN_READ48_RSP_DATA_G(r48rsp->key_to_data);
+ dev_dbg(fp->fdev->dev,
+ "port_read_rsp lport=%u (key_to_data=0x%llx) key=%d data:%lld retval:%lld",
+ fp->lport, r48rsp->key_to_data, keys[i], data[i],
+ FUN_ADMIN_READ48_RSP_RET_G(r48rsp->key_to_data));
+ }
+ return 0;
+}
+
+int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data)
+{
+ return fun_port_read_cmds(fp, 1, &key, data);
+}
+
+static void fun_report_link(struct net_device *netdev)
+{
+ if (netif_carrier_ok(netdev)) {
+ const struct funeth_priv *fp = netdev_priv(netdev);
+ const char *fec = "", *pause = "";
+ int speed = fp->link_speed;
+ char unit = 'M';
+
+ if (fp->link_speed >= SPEED_1000) {
+ speed /= 1000;
+ unit = 'G';
+ }
+
+ if (fp->active_fec & FUN_PORT_FEC_RS)
+ fec = ", RS-FEC";
+ else if (fp->active_fec & FUN_PORT_FEC_FC)
+ fec = ", BASER-FEC";
+
+ if ((fp->active_fc & FUN_PORT_CAP_PAUSE_MASK) == FUN_PORT_CAP_PAUSE_MASK)
+ pause = ", Tx/Rx PAUSE";
+ else if (fp->active_fc & FUN_PORT_CAP_RX_PAUSE)
+ pause = ", Rx PAUSE";
+ else if (fp->active_fc & FUN_PORT_CAP_TX_PAUSE)
+ pause = ", Tx PAUSE";
+
+ netdev_info(netdev, "Link up at %d %cb/s full-duplex%s%s\n",
+ speed, unit, pause, fec);
+ } else {
+ netdev_info(netdev, "Link down\n");
+ }
+}
+
+static int fun_adi_write(struct fun_dev *fdev, enum fun_admin_adi_attr attr,
+ unsigned int adi_id, const struct fun_adi_param *param)
+{
+ struct fun_admin_adi_req req = {
+ .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ADI,
+ sizeof(req)),
+ .u.write.subop = FUN_ADMIN_SUBOP_WRITE,
+ .u.write.attribute = attr,
+ .u.write.id = cpu_to_be32(adi_id),
+ .u.write.param = *param
+ };
+
+ return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0);
+}
+
+/* Configure RSS for the given port. @op determines whether a new RSS context
+ * is to be created or whether an existing one should be reconfigured. The
+ * remaining parameters specify the hashing algorithm, key, and indirection
+ * table.
+ *
+ * This initiates packet delivery to the Rx queues set in the indirection
+ * table.
+ */
+int fun_config_rss(struct net_device *dev, int algo, const u8 *key,
+ const u32 *qtable, u8 op)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ unsigned int table_len = fp->indir_table_nentries;
+ unsigned int len = FUN_ETH_RSS_MAX_KEY_SIZE + sizeof(u32) * table_len;
+ struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
+ union {
+ struct {
+ struct fun_admin_rss_req req;
+ struct fun_dataop_gl gl;
+ };
+ struct fun_admin_generic_create_rsp rsp;
+ } cmd;
+ __be32 *indir_tab;
+ u16 flags;
+ int rc;
+
+ if (op != FUN_ADMIN_SUBOP_CREATE && fp->rss_hw_id == FUN_HCI_ID_INVALID)
+ return -EINVAL;
+
+ flags = op == FUN_ADMIN_SUBOP_CREATE ?
+ FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR : 0;
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_RSS,
+ sizeof(cmd));
+ cmd.req.u.create =
+ FUN_ADMIN_RSS_CREATE_REQ_INIT(op, flags, fp->rss_hw_id,
+ dev->dev_port, algo,
+ FUN_ETH_RSS_MAX_KEY_SIZE,
+ table_len, 0,
+ FUN_ETH_RSS_MAX_KEY_SIZE);
+ cmd.req.u.create.dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len);
+ fun_dataop_gl_init(&cmd.gl, 0, 0, len, fp->rss_dma_addr);
+
+ /* write the key and indirection table into the RSS DMA area */
+ memcpy(fp->rss_cfg, key, FUN_ETH_RSS_MAX_KEY_SIZE);
+ indir_tab = fp->rss_cfg + FUN_ETH_RSS_MAX_KEY_SIZE;
+ for (rc = 0; rc < table_len; rc++)
+ *indir_tab++ = cpu_to_be32(rxqs[*qtable++]->hw_cqid);
+
+ rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
+ &cmd.rsp, sizeof(cmd.rsp), 0);
+ if (!rc && op == FUN_ADMIN_SUBOP_CREATE)
+ fp->rss_hw_id = be32_to_cpu(cmd.rsp.id);
+ return rc;
+}
+
+/* Destroy the HW RSS conntext associated with the given port. This also stops
+ * all packet delivery to our Rx queues.
+ */
+static void fun_destroy_rss(struct funeth_priv *fp)
+{
+ if (fp->rss_hw_id != FUN_HCI_ID_INVALID) {
+ fun_res_destroy(fp->fdev, FUN_ADMIN_OP_RSS, 0, fp->rss_hw_id);
+ fp->rss_hw_id = FUN_HCI_ID_INVALID;
+ }
+}
+
+static void fun_irq_aff_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct fun_irq *p = container_of(notify, struct fun_irq, aff_notify);
+
+ cpumask_copy(&p->affinity_mask, mask);
+}
+
+static void fun_irq_aff_release(struct kref __always_unused *ref)
+{
+}
+
+/* Allocate an IRQ structure, assign an MSI-X index and initial affinity to it,
+ * and add it to the IRQ XArray.
+ */
+static struct fun_irq *fun_alloc_qirq(struct funeth_priv *fp, unsigned int idx,
+ int node, unsigned int xa_idx_offset)
+{
+ struct fun_irq *irq;
+ int cpu, res;
+
+ cpu = cpumask_local_spread(idx, node);
+ node = cpu_to_mem(cpu);
+
+ irq = kzalloc_node(sizeof(*irq), GFP_KERNEL, node);
+ if (!irq)
+ return ERR_PTR(-ENOMEM);
+
+ res = fun_reserve_irqs(fp->fdev, 1, &irq->irq_idx);
+ if (res != 1)
+ goto free_irq;
+
+ res = xa_insert(&fp->irqs, idx + xa_idx_offset, irq, GFP_KERNEL);
+ if (res)
+ goto release_irq;
+
+ irq->irq = pci_irq_vector(fp->pdev, irq->irq_idx);
+ cpumask_set_cpu(cpu, &irq->affinity_mask);
+ irq->aff_notify.notify = fun_irq_aff_notify;
+ irq->aff_notify.release = fun_irq_aff_release;
+ irq->state = FUN_IRQ_INIT;
+ return irq;
+
+release_irq:
+ fun_release_irqs(fp->fdev, 1, &irq->irq_idx);
+free_irq:
+ kfree(irq);
+ return ERR_PTR(res);
+}
+
+static void fun_free_qirq(struct funeth_priv *fp, struct fun_irq *irq)
+{
+ netif_napi_del(&irq->napi);
+ fun_release_irqs(fp->fdev, 1, &irq->irq_idx);
+ kfree(irq);
+}
+
+/* Release the IRQs reserved for Tx/Rx queues that aren't being used. */
+static void fun_prune_queue_irqs(struct net_device *dev)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ unsigned int nreleased = 0;
+ struct fun_irq *irq;
+ unsigned long idx;
+
+ xa_for_each(&fp->irqs, idx, irq) {
+ if (irq->txq || irq->rxq) /* skip those in use */
+ continue;
+
+ xa_erase(&fp->irqs, idx);
+ fun_free_qirq(fp, irq);
+ nreleased++;
+ if (idx < fp->rx_irq_ofst)
+ fp->num_tx_irqs--;
+ else
+ fp->num_rx_irqs--;
+ }
+ netif_info(fp, intr, dev, "Released %u queue IRQs\n", nreleased);
+}
+
+/* Reserve IRQs, one per queue, to acommodate the requested queue numbers @ntx
+ * and @nrx. IRQs are added incrementally to those we already have.
+ * We hold on to allocated IRQs until garbage collection of unused IRQs is
+ * separately requested.
+ */
+static int fun_alloc_queue_irqs(struct net_device *dev, unsigned int ntx,
+ unsigned int nrx)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ int node = dev_to_node(&fp->pdev->dev);
+ struct fun_irq *irq;
+ unsigned int i;
+
+ for (i = fp->num_tx_irqs; i < ntx; i++) {
+ irq = fun_alloc_qirq(fp, i, node, 0);
+ if (IS_ERR(irq))
+ return PTR_ERR(irq);
+
+ fp->num_tx_irqs++;
+ netif_napi_add_tx(dev, &irq->napi, fun_txq_napi_poll);
+ }
+
+ for (i = fp->num_rx_irqs; i < nrx; i++) {
+ irq = fun_alloc_qirq(fp, i, node, fp->rx_irq_ofst);
+ if (IS_ERR(irq))
+ return PTR_ERR(irq);
+
+ fp->num_rx_irqs++;
+ netif_napi_add(dev, &irq->napi, fun_rxq_napi_poll);
+ }
+
+ netif_info(fp, intr, dev, "Reserved %u/%u IRQs for Tx/Rx queues\n",
+ ntx, nrx);
+ return 0;
+}
+
+static void free_txqs(struct funeth_txq **txqs, unsigned int nqs,
+ unsigned int start, int state)
+{
+ unsigned int i;
+
+ for (i = start; i < nqs && txqs[i]; i++)
+ txqs[i] = funeth_txq_free(txqs[i], state);
+}
+
+static int alloc_txqs(struct net_device *dev, struct funeth_txq **txqs,
+ unsigned int nqs, unsigned int depth, unsigned int start,
+ int state)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ unsigned int i;
+ int err;
+
+ for (i = start; i < nqs; i++) {
+ err = funeth_txq_create(dev, i, depth, xa_load(&fp->irqs, i),
+ state, &txqs[i]);
+ if (err) {
+ free_txqs(txqs, nqs, start, FUN_QSTATE_DESTROYED);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void free_rxqs(struct funeth_rxq **rxqs, unsigned int nqs,
+ unsigned int start, int state)
+{
+ unsigned int i;
+
+ for (i = start; i < nqs && rxqs[i]; i++)
+ rxqs[i] = funeth_rxq_free(rxqs[i], state);
+}
+
+static int alloc_rxqs(struct net_device *dev, struct funeth_rxq **rxqs,
+ unsigned int nqs, unsigned int ncqe, unsigned int nrqe,
+ unsigned int start, int state)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ unsigned int i;
+ int err;
+
+ for (i = start; i < nqs; i++) {
+ err = funeth_rxq_create(dev, i, ncqe, nrqe,
+ xa_load(&fp->irqs, i + fp->rx_irq_ofst),
+ state, &rxqs[i]);
+ if (err) {
+ free_rxqs(rxqs, nqs, start, FUN_QSTATE_DESTROYED);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static void free_xdpqs(struct funeth_txq **xdpqs, unsigned int nqs,
+ unsigned int start, int state)
+{
+ unsigned int i;
+
+ for (i = start; i < nqs && xdpqs[i]; i++)
+ xdpqs[i] = funeth_txq_free(xdpqs[i], state);
+
+ if (state == FUN_QSTATE_DESTROYED)
+ kfree(xdpqs);
+}
+
+static struct funeth_txq **alloc_xdpqs(struct net_device *dev, unsigned int nqs,
+ unsigned int depth, unsigned int start,
+ int state)
+{
+ struct funeth_txq **xdpqs;
+ unsigned int i;
+ int err;
+
+ xdpqs = kcalloc(nqs, sizeof(*xdpqs), GFP_KERNEL);
+ if (!xdpqs)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = start; i < nqs; i++) {
+ err = funeth_txq_create(dev, i, depth, NULL, state, &xdpqs[i]);
+ if (err) {
+ free_xdpqs(xdpqs, nqs, start, FUN_QSTATE_DESTROYED);
+ return ERR_PTR(err);
+ }
+ }
+ return xdpqs;
+}
+
+static void fun_free_rings(struct net_device *netdev, struct fun_qset *qset)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct funeth_txq **xdpqs = qset->xdpqs;
+ struct funeth_rxq **rxqs = qset->rxqs;
+
+ /* qset may not specify any queues to operate on. In that case the
+ * currently installed queues are implied.
+ */
+ if (!rxqs) {
+ rxqs = rtnl_dereference(fp->rxqs);
+ xdpqs = rtnl_dereference(fp->xdpqs);
+ qset->txqs = fp->txqs;
+ qset->nrxqs = netdev->real_num_rx_queues;
+ qset->ntxqs = netdev->real_num_tx_queues;
+ qset->nxdpqs = fp->num_xdpqs;
+ }
+ if (!rxqs)
+ return;
+
+ if (rxqs == rtnl_dereference(fp->rxqs)) {
+ rcu_assign_pointer(fp->rxqs, NULL);
+ rcu_assign_pointer(fp->xdpqs, NULL);
+ synchronize_net();
+ fp->txqs = NULL;
+ }
+
+ free_rxqs(rxqs, qset->nrxqs, qset->rxq_start, qset->state);
+ free_txqs(qset->txqs, qset->ntxqs, qset->txq_start, qset->state);
+ free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, qset->state);
+ if (qset->state == FUN_QSTATE_DESTROYED)
+ kfree(rxqs);
+
+ /* Tell the caller which queues were operated on. */
+ qset->rxqs = rxqs;
+ qset->xdpqs = xdpqs;
+}
+
+static int fun_alloc_rings(struct net_device *netdev, struct fun_qset *qset)
+{
+ struct funeth_txq **xdpqs = NULL, **txqs;
+ struct funeth_rxq **rxqs;
+ int err;
+
+ err = fun_alloc_queue_irqs(netdev, qset->ntxqs, qset->nrxqs);
+ if (err)
+ return err;
+
+ rxqs = kcalloc(qset->ntxqs + qset->nrxqs, sizeof(*rxqs), GFP_KERNEL);
+ if (!rxqs)
+ return -ENOMEM;
+
+ if (qset->nxdpqs) {
+ xdpqs = alloc_xdpqs(netdev, qset->nxdpqs, qset->sq_depth,
+ qset->xdpq_start, qset->state);
+ if (IS_ERR(xdpqs)) {
+ err = PTR_ERR(xdpqs);
+ goto free_qvec;
+ }
+ }
+
+ txqs = (struct funeth_txq **)&rxqs[qset->nrxqs];
+ err = alloc_txqs(netdev, txqs, qset->ntxqs, qset->sq_depth,
+ qset->txq_start, qset->state);
+ if (err)
+ goto free_xdpqs;
+
+ err = alloc_rxqs(netdev, rxqs, qset->nrxqs, qset->cq_depth,
+ qset->rq_depth, qset->rxq_start, qset->state);
+ if (err)
+ goto free_txqs;
+
+ qset->rxqs = rxqs;
+ qset->txqs = txqs;
+ qset->xdpqs = xdpqs;
+ return 0;
+
+free_txqs:
+ free_txqs(txqs, qset->ntxqs, qset->txq_start, FUN_QSTATE_DESTROYED);
+free_xdpqs:
+ free_xdpqs(xdpqs, qset->nxdpqs, qset->xdpq_start, FUN_QSTATE_DESTROYED);
+free_qvec:
+ kfree(rxqs);
+ return err;
+}
+
+/* Take queues to the next level. Presently this means creating them on the
+ * device.
+ */
+static int fun_advance_ring_state(struct net_device *dev, struct fun_qset *qset)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ int i, err;
+
+ for (i = 0; i < qset->nrxqs; i++) {
+ err = fun_rxq_create_dev(qset->rxqs[i],
+ xa_load(&fp->irqs,
+ i + fp->rx_irq_ofst));
+ if (err)
+ goto out;
+ }
+
+ for (i = 0; i < qset->ntxqs; i++) {
+ err = fun_txq_create_dev(qset->txqs[i], xa_load(&fp->irqs, i));
+ if (err)
+ goto out;
+ }
+
+ for (i = 0; i < qset->nxdpqs; i++) {
+ err = fun_txq_create_dev(qset->xdpqs[i], NULL);
+ if (err)
+ goto out;
+ }
+
+ return 0;
+
+out:
+ fun_free_rings(dev, qset);
+ return err;
+}
+
+static int fun_port_create(struct net_device *netdev)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ union {
+ struct fun_admin_port_req req;
+ struct fun_admin_port_rsp rsp;
+ } cmd;
+ int rc;
+
+ if (fp->lport != INVALID_LPORT)
+ return 0;
+
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
+ sizeof(cmd.req));
+ cmd.req.u.create =
+ FUN_ADMIN_PORT_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0,
+ netdev->dev_port);
+
+ rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp,
+ sizeof(cmd.rsp), 0);
+
+ if (!rc)
+ fp->lport = be16_to_cpu(cmd.rsp.u.create.lport);
+ return rc;
+}
+
+static int fun_port_destroy(struct net_device *netdev)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+
+ if (fp->lport == INVALID_LPORT)
+ return 0;
+
+ fp->lport = INVALID_LPORT;
+ return fun_res_destroy(fp->fdev, FUN_ADMIN_OP_PORT, 0,
+ netdev->dev_port);
+}
+
+static int fun_eth_create(struct funeth_priv *fp)
+{
+ union {
+ struct fun_admin_eth_req req;
+ struct fun_admin_generic_create_rsp rsp;
+ } cmd;
+ int rc;
+
+ cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ETH,
+ sizeof(cmd.req));
+ cmd.req.u.create = FUN_ADMIN_ETH_CREATE_REQ_INIT(
+ FUN_ADMIN_SUBOP_CREATE,
+ FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR,
+ 0, fp->netdev->dev_port);
+
+ rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp,
+ sizeof(cmd.rsp), 0);
+ return rc ? rc : be32_to_cpu(cmd.rsp.id);
+}
+
+static int fun_vi_create(struct funeth_priv *fp)
+{
+ struct fun_admin_vi_req req = {
+ .common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_VI,
+ sizeof(req)),
+ .u.create = FUN_ADMIN_VI_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE,
+ 0,
+ fp->netdev->dev_port,
+ fp->netdev->dev_port)
+ };
+
+ return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
+}
+
+/* Helper to create an ETH flow and bind an SQ to it.
+ * Returns the ETH id (>= 0) on success or a negative error.
+ */
+int fun_create_and_bind_tx(struct funeth_priv *fp, u32 sqid)
+{
+ int rc, ethid;
+
+ ethid = fun_eth_create(fp);
+ if (ethid >= 0) {
+ rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_EPSQ, sqid,
+ FUN_ADMIN_BIND_TYPE_ETH, ethid);
+ if (rc) {
+ fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, ethid);
+ ethid = rc;
+ }
+ }
+ return ethid;
+}
+
+static irqreturn_t fun_queue_irq_handler(int irq, void *data)
+{
+ struct fun_irq *p = data;
+
+ if (p->rxq) {
+ prefetch(p->rxq->next_cqe_info);
+ p->rxq->irq_cnt++;
+ }
+ napi_schedule_irqoff(&p->napi);
+ return IRQ_HANDLED;
+}
+
+static int fun_enable_irqs(struct net_device *dev)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ unsigned long idx, last;
+ unsigned int qidx;
+ struct fun_irq *p;
+ const char *qtype;
+ int err;
+
+ xa_for_each(&fp->irqs, idx, p) {
+ if (p->txq) {
+ qtype = "tx";
+ qidx = p->txq->qidx;
+ } else if (p->rxq) {
+ qtype = "rx";
+ qidx = p->rxq->qidx;
+ } else {
+ continue;
+ }
+
+ if (p->state != FUN_IRQ_INIT)
+ continue;
+
+ snprintf(p->name, sizeof(p->name) - 1, "%s-%s-%u", dev->name,
+ qtype, qidx);
+ err = request_irq(p->irq, fun_queue_irq_handler, 0, p->name, p);
+ if (err) {
+ netdev_err(dev, "Failed to allocate IRQ %u, err %d\n",
+ p->irq, err);
+ goto unroll;
+ }
+ p->state = FUN_IRQ_REQUESTED;
+ }
+
+ xa_for_each(&fp->irqs, idx, p) {
+ if (p->state != FUN_IRQ_REQUESTED)
+ continue;
+ irq_set_affinity_notifier(p->irq, &p->aff_notify);
+ irq_set_affinity_and_hint(p->irq, &p->affinity_mask);
+ napi_enable(&p->napi);
+ p->state = FUN_IRQ_ENABLED;
+ }
+
+ return 0;
+
+unroll:
+ last = idx - 1;
+ xa_for_each_range(&fp->irqs, idx, p, 0, last)
+ if (p->state == FUN_IRQ_REQUESTED) {
+ free_irq(p->irq, p);
+ p->state = FUN_IRQ_INIT;
+ }
+
+ return err;
+}
+
+static void fun_disable_one_irq(struct fun_irq *irq)
+{
+ napi_disable(&irq->napi);
+ irq_set_affinity_notifier(irq->irq, NULL);
+ irq_update_affinity_hint(irq->irq, NULL);
+ free_irq(irq->irq, irq);
+ irq->state = FUN_IRQ_INIT;
+}
+
+static void fun_disable_irqs(struct net_device *dev)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct fun_irq *p;
+ unsigned long idx;
+
+ xa_for_each(&fp->irqs, idx, p)
+ if (p->state == FUN_IRQ_ENABLED)
+ fun_disable_one_irq(p);
+}
+
+static void fun_down(struct net_device *dev, struct fun_qset *qset)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+
+ /* If we don't have queues the data path is already down.
+ * Note netif_running(dev) may be true.
+ */
+ if (!rcu_access_pointer(fp->rxqs))
+ return;
+
+ /* It is also down if the queues aren't on the device. */
+ if (fp->txqs[0]->init_state >= FUN_QSTATE_INIT_FULL) {
+ netif_info(fp, ifdown, dev,
+ "Tearing down data path on device\n");
+ fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_DISABLE, 0);
+
+ netif_carrier_off(dev);
+ netif_tx_disable(dev);
+
+ fun_destroy_rss(fp);
+ fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port);
+ fun_disable_irqs(dev);
+ }
+
+ fun_free_rings(dev, qset);
+}
+
+static int fun_up(struct net_device *dev, struct fun_qset *qset)
+{
+ static const int port_keys[] = {
+ FUN_ADMIN_PORT_KEY_STATS_DMA_LOW,
+ FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH,
+ FUN_ADMIN_PORT_KEY_ENABLE
+ };
+
+ struct funeth_priv *fp = netdev_priv(dev);
+ u64 vals[] = {
+ lower_32_bits(fp->stats_dma_addr),
+ upper_32_bits(fp->stats_dma_addr),
+ FUN_PORT_FLAG_ENABLE_NOTIFY
+ };
+ int err;
+
+ netif_info(fp, ifup, dev, "Setting up data path on device\n");
+
+ if (qset->rxqs[0]->init_state < FUN_QSTATE_INIT_FULL) {
+ err = fun_advance_ring_state(dev, qset);
+ if (err)
+ return err;
+ }
+
+ err = fun_vi_create(fp);
+ if (err)
+ goto free_queues;
+
+ fp->txqs = qset->txqs;
+ rcu_assign_pointer(fp->rxqs, qset->rxqs);
+ rcu_assign_pointer(fp->xdpqs, qset->xdpqs);
+
+ err = fun_enable_irqs(dev);
+ if (err)
+ goto destroy_vi;
+
+ if (fp->rss_cfg) {
+ err = fun_config_rss(dev, fp->hash_algo, fp->rss_key,
+ fp->indir_table, FUN_ADMIN_SUBOP_CREATE);
+ } else {
+ /* The non-RSS case has only 1 queue. */
+ err = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_VI, dev->dev_port,
+ FUN_ADMIN_BIND_TYPE_EPCQ,
+ qset->rxqs[0]->hw_cqid);
+ }
+ if (err)
+ goto disable_irqs;
+
+ err = fun_port_write_cmds(fp, 3, port_keys, vals);
+ if (err)
+ goto free_rss;
+
+ netif_tx_start_all_queues(dev);
+ return 0;
+
+free_rss:
+ fun_destroy_rss(fp);
+disable_irqs:
+ fun_disable_irqs(dev);
+destroy_vi:
+ fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, dev->dev_port);
+free_queues:
+ fun_free_rings(dev, qset);
+ return err;
+}
+
+static int funeth_open(struct net_device *netdev)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct fun_qset qset = {
+ .nrxqs = netdev->real_num_rx_queues,
+ .ntxqs = netdev->real_num_tx_queues,
+ .nxdpqs = fp->num_xdpqs,
+ .cq_depth = fp->cq_depth,
+ .rq_depth = fp->rq_depth,
+ .sq_depth = fp->sq_depth,
+ .state = FUN_QSTATE_INIT_FULL,
+ };
+ int rc;
+
+ rc = fun_alloc_rings(netdev, &qset);
+ if (rc)
+ return rc;
+
+ rc = fun_up(netdev, &qset);
+ if (rc) {
+ qset.state = FUN_QSTATE_DESTROYED;
+ fun_free_rings(netdev, &qset);
+ }
+
+ return rc;
+}
+
+static int funeth_close(struct net_device *netdev)
+{
+ struct fun_qset qset = { .state = FUN_QSTATE_DESTROYED };
+
+ fun_down(netdev, &qset);
+ return 0;
+}
+
+static void fun_get_stats64(struct net_device *netdev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct funeth_txq **xdpqs;
+ struct funeth_rxq **rxqs;
+ unsigned int i, start;
+
+ stats->tx_packets = fp->tx_packets;
+ stats->tx_bytes = fp->tx_bytes;
+ stats->tx_dropped = fp->tx_dropped;
+
+ stats->rx_packets = fp->rx_packets;
+ stats->rx_bytes = fp->rx_bytes;
+ stats->rx_dropped = fp->rx_dropped;
+
+ rcu_read_lock();
+ rxqs = rcu_dereference(fp->rxqs);
+ if (!rxqs)
+ goto unlock;
+
+ for (i = 0; i < netdev->real_num_tx_queues; i++) {
+ struct funeth_txq_stats txs;
+
+ FUN_QSTAT_READ(fp->txqs[i], start, txs);
+ stats->tx_packets += txs.tx_pkts;
+ stats->tx_bytes += txs.tx_bytes;
+ stats->tx_dropped += txs.tx_map_err;
+ }
+
+ for (i = 0; i < netdev->real_num_rx_queues; i++) {
+ struct funeth_rxq_stats rxs;
+
+ FUN_QSTAT_READ(rxqs[i], start, rxs);
+ stats->rx_packets += rxs.rx_pkts;
+ stats->rx_bytes += rxs.rx_bytes;
+ stats->rx_dropped += rxs.rx_map_err + rxs.rx_mem_drops;
+ }
+
+ xdpqs = rcu_dereference(fp->xdpqs);
+ if (!xdpqs)
+ goto unlock;
+
+ for (i = 0; i < fp->num_xdpqs; i++) {
+ struct funeth_txq_stats txs;
+
+ FUN_QSTAT_READ(xdpqs[i], start, txs);
+ stats->tx_packets += txs.tx_pkts;
+ stats->tx_bytes += txs.tx_bytes;
+ }
+unlock:
+ rcu_read_unlock();
+}
+
+static int fun_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ int rc;
+
+ rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MTU, new_mtu);
+ if (!rc)
+ netdev->mtu = new_mtu;
+ return rc;
+}
+
+static int fun_set_macaddr(struct net_device *netdev, void *addr)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct sockaddr *saddr = addr;
+ int rc;
+
+ if (!is_valid_ether_addr(saddr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ if (ether_addr_equal(netdev->dev_addr, saddr->sa_data))
+ return 0;
+
+ rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR,
+ ether_addr_to_u64(saddr->sa_data));
+ if (!rc)
+ eth_hw_addr_set(netdev, saddr->sa_data);
+ return rc;
+}
+
+static int fun_get_port_attributes(struct net_device *netdev)
+{
+ static const int keys[] = {
+ FUN_ADMIN_PORT_KEY_MACADDR, FUN_ADMIN_PORT_KEY_CAPABILITIES,
+ FUN_ADMIN_PORT_KEY_ADVERT, FUN_ADMIN_PORT_KEY_MTU
+ };
+ static const int phys_keys[] = {
+ FUN_ADMIN_PORT_KEY_LANE_ATTRS,
+ };
+
+ struct funeth_priv *fp = netdev_priv(netdev);
+ u64 data[ARRAY_SIZE(keys)];
+ u8 mac[ETH_ALEN];
+ int i, rc;
+
+ rc = fun_port_read_cmds(fp, ARRAY_SIZE(keys), keys, data);
+ if (rc)
+ return rc;
+
+ for (i = 0; i < ARRAY_SIZE(keys); i++) {
+ switch (keys[i]) {
+ case FUN_ADMIN_PORT_KEY_MACADDR:
+ u64_to_ether_addr(data[i], mac);
+ if (is_zero_ether_addr(mac)) {
+ eth_hw_addr_random(netdev);
+ } else if (is_valid_ether_addr(mac)) {
+ eth_hw_addr_set(netdev, mac);
+ } else {
+ netdev_err(netdev,
+ "device provided a bad MAC address %pM\n",
+ mac);
+ return -EINVAL;
+ }
+ break;
+
+ case FUN_ADMIN_PORT_KEY_CAPABILITIES:
+ fp->port_caps = data[i];
+ break;
+
+ case FUN_ADMIN_PORT_KEY_ADVERT:
+ fp->advertising = data[i];
+ break;
+
+ case FUN_ADMIN_PORT_KEY_MTU:
+ netdev->mtu = data[i];
+ break;
+ }
+ }
+
+ if (!(fp->port_caps & FUN_PORT_CAP_VPORT)) {
+ rc = fun_port_read_cmds(fp, ARRAY_SIZE(phys_keys), phys_keys,
+ data);
+ if (rc)
+ return rc;
+
+ fp->lane_attrs = data[0];
+ }
+
+ if (netdev->addr_assign_type == NET_ADDR_RANDOM)
+ return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR,
+ ether_addr_to_u64(netdev->dev_addr));
+ return 0;
+}
+
+static int fun_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+ const struct funeth_priv *fp = netdev_priv(dev);
+
+ return copy_to_user(ifr->ifr_data, &fp->hwtstamp_cfg,
+ sizeof(fp->hwtstamp_cfg)) ? -EFAULT : 0;
+}
+
+static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct hwtstamp_config cfg;
+
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ /* no TX HW timestamps */
+ cfg.tx_type = HWTSTAMP_TX_OFF;
+
+ switch (cfg.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ cfg.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ fp->hwtstamp_cfg = cfg;
+ return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int fun_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return fun_hwtstamp_set(dev, ifr);
+ case SIOCGHWTSTAMP:
+ return fun_hwtstamp_get(dev, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/* Prepare the queues for XDP. */
+static int fun_enter_xdp(struct net_device *dev, struct bpf_prog *prog)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ unsigned int i, nqs = num_online_cpus();
+ struct funeth_txq **xdpqs;
+ struct funeth_rxq **rxqs;
+ int err;
+
+ xdpqs = alloc_xdpqs(dev, nqs, fp->sq_depth, 0, FUN_QSTATE_INIT_FULL);
+ if (IS_ERR(xdpqs))
+ return PTR_ERR(xdpqs);
+
+ rxqs = rtnl_dereference(fp->rxqs);
+ for (i = 0; i < dev->real_num_rx_queues; i++) {
+ err = fun_rxq_set_bpf(rxqs[i], prog);
+ if (err)
+ goto out;
+ }
+
+ fp->num_xdpqs = nqs;
+ rcu_assign_pointer(fp->xdpqs, xdpqs);
+ return 0;
+out:
+ while (i--)
+ fun_rxq_set_bpf(rxqs[i], NULL);
+
+ free_xdpqs(xdpqs, nqs, 0, FUN_QSTATE_DESTROYED);
+ return err;
+}
+
+/* Set the queues for non-XDP operation. */
+static void fun_end_xdp(struct net_device *dev)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct funeth_txq **xdpqs;
+ struct funeth_rxq **rxqs;
+ unsigned int i;
+
+ xdpqs = rtnl_dereference(fp->xdpqs);
+ rcu_assign_pointer(fp->xdpqs, NULL);
+ synchronize_net();
+ /* at this point both Rx and Tx XDP processing has ended */
+
+ free_xdpqs(xdpqs, fp->num_xdpqs, 0, FUN_QSTATE_DESTROYED);
+ fp->num_xdpqs = 0;
+
+ rxqs = rtnl_dereference(fp->rxqs);
+ for (i = 0; i < dev->real_num_rx_queues; i++)
+ fun_rxq_set_bpf(rxqs[i], NULL);
+}
+
+#define XDP_MAX_MTU \
+ (PAGE_SIZE - FUN_XDP_HEADROOM - VLAN_ETH_HLEN - FUN_RX_TAILROOM)
+
+static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ struct bpf_prog *old_prog, *prog = xdp->prog;
+ struct funeth_priv *fp = netdev_priv(dev);
+ int i, err;
+
+ /* XDP uses at most one buffer */
+ if (prog && dev->mtu > XDP_MAX_MTU) {
+ netdev_err(dev, "device MTU %u too large for XDP\n", dev->mtu);
+ NL_SET_ERR_MSG_MOD(xdp->extack,
+ "Device MTU too large for XDP");
+ return -EINVAL;
+ }
+
+ if (!netif_running(dev)) {
+ fp->num_xdpqs = prog ? num_online_cpus() : 0;
+ } else if (prog && !fp->xdp_prog) {
+ err = fun_enter_xdp(dev, prog);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(xdp->extack,
+ "Failed to set queues for XDP.");
+ return err;
+ }
+ } else if (!prog && fp->xdp_prog) {
+ fun_end_xdp(dev);
+ } else {
+ struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
+
+ for (i = 0; i < dev->real_num_rx_queues; i++)
+ WRITE_ONCE(rxqs[i]->xdp_prog, prog);
+ }
+
+ dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU;
+ old_prog = xchg(&fp->xdp_prog, prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ return 0;
+}
+
+static int fun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+ switch (xdp->command) {
+ case XDP_SETUP_PROG:
+ return fun_xdp_setup(dev, xdp);
+ default:
+ return -EINVAL;
+ }
+}
+
+static struct devlink_port *fun_get_devlink_port(struct net_device *netdev)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+
+ return &fp->dl_port;
+}
+
+static int fun_init_vports(struct fun_ethdev *ed, unsigned int n)
+{
+ if (ed->num_vports)
+ return -EINVAL;
+
+ ed->vport_info = kvcalloc(n, sizeof(*ed->vport_info), GFP_KERNEL);
+ if (!ed->vport_info)
+ return -ENOMEM;
+ ed->num_vports = n;
+ return 0;
+}
+
+static void fun_free_vports(struct fun_ethdev *ed)
+{
+ kvfree(ed->vport_info);
+ ed->vport_info = NULL;
+ ed->num_vports = 0;
+}
+
+static struct fun_vport_info *fun_get_vport(struct fun_ethdev *ed,
+ unsigned int vport)
+{
+ if (!ed->vport_info || vport >= ed->num_vports)
+ return NULL;
+
+ return ed->vport_info + vport;
+}
+
+static int fun_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct fun_adi_param mac_param = {};
+ struct fun_dev *fdev = fp->fdev;
+ struct fun_ethdev *ed = to_fun_ethdev(fdev);
+ struct fun_vport_info *vi;
+ int rc = -EINVAL;
+
+ if (is_multicast_ether_addr(mac))
+ return -EINVAL;
+
+ mutex_lock(&ed->state_mutex);
+ vi = fun_get_vport(ed, vf);
+ if (!vi)
+ goto unlock;
+
+ mac_param.u.mac = FUN_ADI_MAC_INIT(ether_addr_to_u64(mac));
+ rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_MACADDR, vf + 1,
+ &mac_param);
+ if (!rc)
+ ether_addr_copy(vi->mac, mac);
+unlock:
+ mutex_unlock(&ed->state_mutex);
+ return rc;
+}
+
+static int fun_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+ __be16 vlan_proto)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct fun_adi_param vlan_param = {};
+ struct fun_dev *fdev = fp->fdev;
+ struct fun_ethdev *ed = to_fun_ethdev(fdev);
+ struct fun_vport_info *vi;
+ int rc = -EINVAL;
+
+ if (vlan > 4095 || qos > 7)
+ return -EINVAL;
+ if (vlan_proto && vlan_proto != htons(ETH_P_8021Q) &&
+ vlan_proto != htons(ETH_P_8021AD))
+ return -EINVAL;
+
+ mutex_lock(&ed->state_mutex);
+ vi = fun_get_vport(ed, vf);
+ if (!vi)
+ goto unlock;
+
+ vlan_param.u.vlan = FUN_ADI_VLAN_INIT(be16_to_cpu(vlan_proto),
+ ((u16)qos << VLAN_PRIO_SHIFT) | vlan);
+ rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_VLAN, vf + 1, &vlan_param);
+ if (!rc) {
+ vi->vlan = vlan;
+ vi->qos = qos;
+ vi->vlan_proto = vlan_proto;
+ }
+unlock:
+ mutex_unlock(&ed->state_mutex);
+ return rc;
+}
+
+static int fun_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct fun_adi_param rate_param = {};
+ struct fun_dev *fdev = fp->fdev;
+ struct fun_ethdev *ed = to_fun_ethdev(fdev);
+ struct fun_vport_info *vi;
+ int rc = -EINVAL;
+
+ if (min_tx_rate)
+ return -EINVAL;
+
+ mutex_lock(&ed->state_mutex);
+ vi = fun_get_vport(ed, vf);
+ if (!vi)
+ goto unlock;
+
+ rate_param.u.rate = FUN_ADI_RATE_INIT(max_tx_rate);
+ rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_RATE, vf + 1, &rate_param);
+ if (!rc)
+ vi->max_rate = max_tx_rate;
+unlock:
+ mutex_unlock(&ed->state_mutex);
+ return rc;
+}
+
+static int fun_get_vf_config(struct net_device *dev, int vf,
+ struct ifla_vf_info *ivi)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct fun_ethdev *ed = to_fun_ethdev(fp->fdev);
+ const struct fun_vport_info *vi;
+
+ mutex_lock(&ed->state_mutex);
+ vi = fun_get_vport(ed, vf);
+ if (!vi)
+ goto unlock;
+
+ memset(ivi, 0, sizeof(*ivi));
+ ivi->vf = vf;
+ ether_addr_copy(ivi->mac, vi->mac);
+ ivi->vlan = vi->vlan;
+ ivi->qos = vi->qos;
+ ivi->vlan_proto = vi->vlan_proto;
+ ivi->max_tx_rate = vi->max_rate;
+ ivi->spoofchk = vi->spoofchk;
+unlock:
+ mutex_unlock(&ed->state_mutex);
+ return vi ? 0 : -EINVAL;
+}
+
+static void fun_uninit(struct net_device *dev)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+
+ fun_prune_queue_irqs(dev);
+ xa_destroy(&fp->irqs);
+}
+
+static const struct net_device_ops fun_netdev_ops = {
+ .ndo_open = funeth_open,
+ .ndo_stop = funeth_close,
+ .ndo_start_xmit = fun_start_xmit,
+ .ndo_get_stats64 = fun_get_stats64,
+ .ndo_change_mtu = fun_change_mtu,
+ .ndo_set_mac_address = fun_set_macaddr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_eth_ioctl = fun_ioctl,
+ .ndo_uninit = fun_uninit,
+ .ndo_bpf = fun_xdp,
+ .ndo_xdp_xmit = fun_xdp_xmit_frames,
+ .ndo_set_vf_mac = fun_set_vf_mac,
+ .ndo_set_vf_vlan = fun_set_vf_vlan,
+ .ndo_set_vf_rate = fun_set_vf_rate,
+ .ndo_get_vf_config = fun_get_vf_config,
+ .ndo_get_devlink_port = fun_get_devlink_port,
+};
+
+#define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \
+ NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \
+ NETIF_F_GSO_UDP_TUNNEL_CSUM)
+#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \
+ NETIF_F_GSO_UDP_L4)
+#define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \
+ GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA)
+
+static void fun_dflt_rss_indir(struct funeth_priv *fp, unsigned int nrx)
+{
+ unsigned int i;
+
+ for (i = 0; i < fp->indir_table_nentries; i++)
+ fp->indir_table[i] = ethtool_rxfh_indir_default(i, nrx);
+}
+
+/* Reset the RSS indirection table to equal distribution across the current
+ * number of Rx queues. Called at init time and whenever the number of Rx
+ * queues changes subsequently. Note that this may also resize the indirection
+ * table.
+ */
+static void fun_reset_rss_indir(struct net_device *dev, unsigned int nrx)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+
+ if (!fp->rss_cfg)
+ return;
+
+ /* Set the table size to the max possible that allows an equal number
+ * of occurrences of each CQ.
+ */
+ fp->indir_table_nentries = rounddown(FUN_ETH_RSS_MAX_INDIR_ENT, nrx);
+ fun_dflt_rss_indir(fp, nrx);
+}
+
+/* Update the RSS LUT to contain only queues in [0, nrx). Normally this will
+ * update the LUT to an equal distribution among nrx queues, If @only_if_needed
+ * is set the LUT is left unchanged if it already does not reference any queues
+ * >= nrx.
+ */
+static int fun_rss_set_qnum(struct net_device *dev, unsigned int nrx,
+ bool only_if_needed)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ u32 old_lut[FUN_ETH_RSS_MAX_INDIR_ENT];
+ unsigned int i, oldsz;
+ int err;
+
+ if (!fp->rss_cfg)
+ return 0;
+
+ if (only_if_needed) {
+ for (i = 0; i < fp->indir_table_nentries; i++)
+ if (fp->indir_table[i] >= nrx)
+ break;
+
+ if (i >= fp->indir_table_nentries)
+ return 0;
+ }
+
+ memcpy(old_lut, fp->indir_table, sizeof(old_lut));
+ oldsz = fp->indir_table_nentries;
+ fun_reset_rss_indir(dev, nrx);
+
+ err = fun_config_rss(dev, fp->hash_algo, fp->rss_key,
+ fp->indir_table, FUN_ADMIN_SUBOP_MODIFY);
+ if (!err)
+ return 0;
+
+ memcpy(fp->indir_table, old_lut, sizeof(old_lut));
+ fp->indir_table_nentries = oldsz;
+ return err;
+}
+
+/* Allocate the DMA area for the RSS configuration commands to the device, and
+ * initialize the hash, hash key, indirection table size and its entries to
+ * their defaults. The indirection table defaults to equal distribution across
+ * the Rx queues.
+ */
+static int fun_init_rss(struct net_device *dev)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ size_t size = sizeof(fp->rss_key) + sizeof(fp->indir_table);
+
+ fp->rss_hw_id = FUN_HCI_ID_INVALID;
+ if (!(fp->port_caps & FUN_PORT_CAP_OFFLOADS))
+ return 0;
+
+ fp->rss_cfg = dma_alloc_coherent(&fp->pdev->dev, size,
+ &fp->rss_dma_addr, GFP_KERNEL);
+ if (!fp->rss_cfg)
+ return -ENOMEM;
+
+ fp->hash_algo = FUN_ETH_RSS_ALG_TOEPLITZ;
+ netdev_rss_key_fill(fp->rss_key, sizeof(fp->rss_key));
+ fun_reset_rss_indir(dev, dev->real_num_rx_queues);
+ return 0;
+}
+
+static void fun_free_rss(struct funeth_priv *fp)
+{
+ if (fp->rss_cfg) {
+ dma_free_coherent(&fp->pdev->dev,
+ sizeof(fp->rss_key) + sizeof(fp->indir_table),
+ fp->rss_cfg, fp->rss_dma_addr);
+ fp->rss_cfg = NULL;
+ }
+}
+
+void fun_set_ring_count(struct net_device *netdev, unsigned int ntx,
+ unsigned int nrx)
+{
+ netif_set_real_num_tx_queues(netdev, ntx);
+ if (nrx != netdev->real_num_rx_queues) {
+ netif_set_real_num_rx_queues(netdev, nrx);
+ fun_reset_rss_indir(netdev, nrx);
+ }
+}
+
+static int fun_init_stats_area(struct funeth_priv *fp)
+{
+ unsigned int nstats;
+
+ if (!(fp->port_caps & FUN_PORT_CAP_STATS))
+ return 0;
+
+ nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX +
+ PORT_MAC_FEC_STATS_MAX;
+
+ fp->stats = dma_alloc_coherent(&fp->pdev->dev, nstats * sizeof(u64),
+ &fp->stats_dma_addr, GFP_KERNEL);
+ if (!fp->stats)
+ return -ENOMEM;
+ return 0;
+}
+
+static void fun_free_stats_area(struct funeth_priv *fp)
+{
+ unsigned int nstats;
+
+ if (fp->stats) {
+ nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX;
+ dma_free_coherent(&fp->pdev->dev, nstats * sizeof(u64),
+ fp->stats, fp->stats_dma_addr);
+ fp->stats = NULL;
+ }
+}
+
+static int fun_dl_port_register(struct net_device *netdev)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ struct devlink *dl = priv_to_devlink(fp->fdev);
+ struct devlink_port_attrs attrs = {};
+ unsigned int idx;
+
+ if (fp->port_caps & FUN_PORT_CAP_VPORT) {
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
+ idx = fp->lport;
+ } else {
+ idx = netdev->dev_port;
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.lanes = fp->lane_attrs & 7;
+ if (fp->lane_attrs & FUN_PORT_LANE_SPLIT) {
+ attrs.split = 1;
+ attrs.phys.port_number = fp->lport & ~3;
+ attrs.phys.split_subport_number = fp->lport & 3;
+ } else {
+ attrs.phys.port_number = fp->lport;
+ }
+ }
+
+ devlink_port_attrs_set(&fp->dl_port, &attrs);
+
+ return devlink_port_register(dl, &fp->dl_port, idx);
+}
+
+/* Determine the max Tx/Rx queues for a port. */
+static int fun_max_qs(struct fun_ethdev *ed, unsigned int *ntx,
+ unsigned int *nrx)
+{
+ int neth;
+
+ if (ed->num_ports > 1 || is_kdump_kernel()) {
+ *ntx = 1;
+ *nrx = 1;
+ return 0;
+ }
+
+ neth = fun_get_res_count(&ed->fdev, FUN_ADMIN_OP_ETH);
+ if (neth < 0)
+ return neth;
+
+ /* We determine the max number of queues based on the CPU
+ * cores, device interrupts and queues, RSS size, and device Tx flows.
+ *
+ * - At least 1 Rx and 1 Tx queues.
+ * - At most 1 Rx/Tx queue per core.
+ * - Each Rx/Tx queue needs 1 SQ.
+ */
+ *ntx = min(ed->nsqs_per_port - 1, num_online_cpus());
+ *nrx = *ntx;
+ if (*ntx > neth)
+ *ntx = neth;
+ if (*nrx > FUN_ETH_RSS_MAX_INDIR_ENT)
+ *nrx = FUN_ETH_RSS_MAX_INDIR_ENT;
+ return 0;
+}
+
+static void fun_queue_defaults(struct net_device *dev, unsigned int nsqs)
+{
+ unsigned int ntx, nrx;
+
+ ntx = min(dev->num_tx_queues, FUN_DFLT_QUEUES);
+ nrx = min(dev->num_rx_queues, FUN_DFLT_QUEUES);
+ if (ntx <= nrx) {
+ ntx = min(ntx, nsqs / 2);
+ nrx = min(nrx, nsqs - ntx);
+ } else {
+ nrx = min(nrx, nsqs / 2);
+ ntx = min(ntx, nsqs - nrx);
+ }
+
+ netif_set_real_num_tx_queues(dev, ntx);
+ netif_set_real_num_rx_queues(dev, nrx);
+}
+
+/* Replace the existing Rx/Tx/XDP queues with equal number of queues with
+ * different settings, e.g. depth. This is a disruptive replacement that
+ * temporarily shuts down the data path and should be limited to changes that
+ * can't be applied to live queues. The old queues are always discarded.
+ */
+int fun_replace_queues(struct net_device *dev, struct fun_qset *newqs,
+ struct netlink_ext_ack *extack)
+{
+ struct fun_qset oldqs = { .state = FUN_QSTATE_DESTROYED };
+ struct funeth_priv *fp = netdev_priv(dev);
+ int err;
+
+ newqs->nrxqs = dev->real_num_rx_queues;
+ newqs->ntxqs = dev->real_num_tx_queues;
+ newqs->nxdpqs = fp->num_xdpqs;
+ newqs->state = FUN_QSTATE_INIT_SW;
+ err = fun_alloc_rings(dev, newqs);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unable to allocate memory for new queues, keeping current settings");
+ return err;
+ }
+
+ fun_down(dev, &oldqs);
+
+ err = fun_up(dev, newqs);
+ if (!err)
+ return 0;
+
+ /* The new queues couldn't be installed. We do not retry the old queues
+ * as they are the same to the device as the new queues and would
+ * similarly fail.
+ */
+ newqs->state = FUN_QSTATE_DESTROYED;
+ fun_free_rings(dev, newqs);
+ NL_SET_ERR_MSG_MOD(extack, "Unable to restore the data path with the new queues.");
+ return err;
+}
+
+/* Change the number of Rx/Tx queues of a device while it is up. This is done
+ * by incrementally adding/removing queues to meet the new requirements while
+ * handling ongoing traffic.
+ */
+int fun_change_num_queues(struct net_device *dev, unsigned int ntx,
+ unsigned int nrx)
+{
+ unsigned int keep_tx = min(dev->real_num_tx_queues, ntx);
+ unsigned int keep_rx = min(dev->real_num_rx_queues, nrx);
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct fun_qset oldqs = {
+ .rxqs = rtnl_dereference(fp->rxqs),
+ .txqs = fp->txqs,
+ .nrxqs = dev->real_num_rx_queues,
+ .ntxqs = dev->real_num_tx_queues,
+ .rxq_start = keep_rx,
+ .txq_start = keep_tx,
+ .state = FUN_QSTATE_DESTROYED
+ };
+ struct fun_qset newqs = {
+ .nrxqs = nrx,
+ .ntxqs = ntx,
+ .rxq_start = keep_rx,
+ .txq_start = keep_tx,
+ .cq_depth = fp->cq_depth,
+ .rq_depth = fp->rq_depth,
+ .sq_depth = fp->sq_depth,
+ .state = FUN_QSTATE_INIT_FULL
+ };
+ int i, err;
+
+ err = fun_alloc_rings(dev, &newqs);
+ if (err)
+ goto free_irqs;
+
+ err = fun_enable_irqs(dev); /* of any newly added queues */
+ if (err)
+ goto free_rings;
+
+ /* copy the queues we are keeping to the new set */
+ memcpy(newqs.rxqs, oldqs.rxqs, keep_rx * sizeof(*oldqs.rxqs));
+ memcpy(newqs.txqs, fp->txqs, keep_tx * sizeof(*fp->txqs));
+
+ if (nrx < dev->real_num_rx_queues) {
+ err = fun_rss_set_qnum(dev, nrx, true);
+ if (err)
+ goto disable_tx_irqs;
+
+ for (i = nrx; i < dev->real_num_rx_queues; i++)
+ fun_disable_one_irq(container_of(oldqs.rxqs[i]->napi,
+ struct fun_irq, napi));
+
+ netif_set_real_num_rx_queues(dev, nrx);
+ }
+
+ if (ntx < dev->real_num_tx_queues)
+ netif_set_real_num_tx_queues(dev, ntx);
+
+ rcu_assign_pointer(fp->rxqs, newqs.rxqs);
+ fp->txqs = newqs.txqs;
+ synchronize_net();
+
+ if (ntx > dev->real_num_tx_queues)
+ netif_set_real_num_tx_queues(dev, ntx);
+
+ if (nrx > dev->real_num_rx_queues) {
+ netif_set_real_num_rx_queues(dev, nrx);
+ fun_rss_set_qnum(dev, nrx, false);
+ }
+
+ /* disable interrupts of any excess Tx queues */
+ for (i = keep_tx; i < oldqs.ntxqs; i++)
+ fun_disable_one_irq(oldqs.txqs[i]->irq);
+
+ fun_free_rings(dev, &oldqs);
+ fun_prune_queue_irqs(dev);
+ return 0;
+
+disable_tx_irqs:
+ for (i = oldqs.ntxqs; i < ntx; i++)
+ fun_disable_one_irq(newqs.txqs[i]->irq);
+free_rings:
+ newqs.state = FUN_QSTATE_DESTROYED;
+ fun_free_rings(dev, &newqs);
+free_irqs:
+ fun_prune_queue_irqs(dev);
+ return err;
+}
+
+static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid)
+{
+ struct fun_dev *fdev = &ed->fdev;
+ struct net_device *netdev;
+ struct funeth_priv *fp;
+ unsigned int ntx, nrx;
+ int rc;
+
+ rc = fun_max_qs(ed, &ntx, &nrx);
+ if (rc)
+ return rc;
+
+ netdev = alloc_etherdev_mqs(sizeof(*fp), ntx, nrx);
+ if (!netdev) {
+ rc = -ENOMEM;
+ goto done;
+ }
+
+ netdev->dev_port = portid;
+ fun_queue_defaults(netdev, ed->nsqs_per_port);
+
+ fp = netdev_priv(netdev);
+ fp->fdev = fdev;
+ fp->pdev = to_pci_dev(fdev->dev);
+ fp->netdev = netdev;
+ xa_init(&fp->irqs);
+ fp->rx_irq_ofst = ntx;
+ seqcount_init(&fp->link_seq);
+
+ fp->lport = INVALID_LPORT;
+ rc = fun_port_create(netdev);
+ if (rc)
+ goto free_netdev;
+
+ /* bind port to admin CQ for async events */
+ rc = fun_bind(fdev, FUN_ADMIN_BIND_TYPE_PORT, portid,
+ FUN_ADMIN_BIND_TYPE_EPCQ, 0);
+ if (rc)
+ goto destroy_port;
+
+ rc = fun_get_port_attributes(netdev);
+ if (rc)
+ goto destroy_port;
+
+ rc = fun_init_rss(netdev);
+ if (rc)
+ goto destroy_port;
+
+ rc = fun_init_stats_area(fp);
+ if (rc)
+ goto free_rss;
+
+ SET_NETDEV_DEV(netdev, fdev->dev);
+ netdev->netdev_ops = &fun_netdev_ops;
+
+ netdev->hw_features = NETIF_F_SG | NETIF_F_RXHASH | NETIF_F_RXCSUM;
+ if (fp->port_caps & FUN_PORT_CAP_OFFLOADS)
+ netdev->hw_features |= NETIF_F_HW_CSUM | TSO_FLAGS;
+ if (fp->port_caps & FUN_PORT_CAP_ENCAP_OFFLOADS)
+ netdev->hw_features |= GSO_ENCAP_FLAGS;
+
+ netdev->features |= netdev->hw_features | NETIF_F_HIGHDMA;
+ netdev->vlan_features = netdev->features & VLAN_FEAT;
+ netdev->mpls_features = netdev->vlan_features;
+ netdev->hw_enc_features = netdev->hw_features;
+
+ netdev->min_mtu = ETH_MIN_MTU;
+ netdev->max_mtu = FUN_MAX_MTU;
+
+ fun_set_ethtool_ops(netdev);
+
+ /* configurable parameters */
+ fp->sq_depth = min(SQ_DEPTH, fdev->q_depth);
+ fp->cq_depth = min(CQ_DEPTH, fdev->q_depth);
+ fp->rq_depth = min_t(unsigned int, RQ_DEPTH, fdev->q_depth);
+ fp->rx_coal_usec = CQ_INTCOAL_USEC;
+ fp->rx_coal_count = CQ_INTCOAL_NPKT;
+ fp->tx_coal_usec = SQ_INTCOAL_USEC;
+ fp->tx_coal_count = SQ_INTCOAL_NPKT;
+ fp->cq_irq_db = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count);
+
+ rc = fun_dl_port_register(netdev);
+ if (rc)
+ goto free_stats;
+
+ fp->ktls_id = FUN_HCI_ID_INVALID;
+ fun_ktls_init(netdev); /* optional, failure OK */
+
+ netif_carrier_off(netdev);
+ ed->netdevs[portid] = netdev;
+ rc = register_netdev(netdev);
+ if (rc)
+ goto unreg_devlink;
+
+ devlink_port_type_eth_set(&fp->dl_port, netdev);
+
+ return 0;
+
+unreg_devlink:
+ ed->netdevs[portid] = NULL;
+ fun_ktls_cleanup(fp);
+ devlink_port_unregister(&fp->dl_port);
+free_stats:
+ fun_free_stats_area(fp);
+free_rss:
+ fun_free_rss(fp);
+destroy_port:
+ fun_port_destroy(netdev);
+free_netdev:
+ free_netdev(netdev);
+done:
+ dev_err(fdev->dev, "couldn't allocate port %u, error %d", portid, rc);
+ return rc;
+}
+
+static void fun_destroy_netdev(struct net_device *netdev)
+{
+ struct funeth_priv *fp;
+
+ fp = netdev_priv(netdev);
+ devlink_port_type_clear(&fp->dl_port);
+ unregister_netdev(netdev);
+ devlink_port_unregister(&fp->dl_port);
+ fun_ktls_cleanup(fp);
+ fun_free_stats_area(fp);
+ fun_free_rss(fp);
+ fun_port_destroy(netdev);
+ free_netdev(netdev);
+}
+
+static int fun_create_ports(struct fun_ethdev *ed, unsigned int nports)
+{
+ struct fun_dev *fd = &ed->fdev;
+ int i, rc;
+
+ /* The admin queue takes 1 IRQ and 2 SQs. */
+ ed->nsqs_per_port = min(fd->num_irqs - 1,
+ fd->kern_end_qid - 2) / nports;
+ if (ed->nsqs_per_port < 2) {
+ dev_err(fd->dev, "Too few SQs for %u ports", nports);
+ return -EINVAL;
+ }
+
+ ed->netdevs = kcalloc(nports, sizeof(*ed->netdevs), GFP_KERNEL);
+ if (!ed->netdevs)
+ return -ENOMEM;
+
+ ed->num_ports = nports;
+ for (i = 0; i < nports; i++) {
+ rc = fun_create_netdev(ed, i);
+ if (rc)
+ goto free_netdevs;
+ }
+
+ return 0;
+
+free_netdevs:
+ while (i)
+ fun_destroy_netdev(ed->netdevs[--i]);
+ kfree(ed->netdevs);
+ ed->netdevs = NULL;
+ ed->num_ports = 0;
+ return rc;
+}
+
+static void fun_destroy_ports(struct fun_ethdev *ed)
+{
+ unsigned int i;
+
+ for (i = 0; i < ed->num_ports; i++)
+ fun_destroy_netdev(ed->netdevs[i]);
+
+ kfree(ed->netdevs);
+ ed->netdevs = NULL;
+ ed->num_ports = 0;
+}
+
+static void fun_update_link_state(const struct fun_ethdev *ed,
+ const struct fun_admin_port_notif *notif)
+{
+ unsigned int port_idx = be16_to_cpu(notif->id);
+ struct net_device *netdev;
+ struct funeth_priv *fp;
+
+ if (port_idx >= ed->num_ports)
+ return;
+
+ netdev = ed->netdevs[port_idx];
+ fp = netdev_priv(netdev);
+
+ write_seqcount_begin(&fp->link_seq);
+ fp->link_speed = be32_to_cpu(notif->speed) * 10; /* 10 Mbps->Mbps */
+ fp->active_fc = notif->flow_ctrl;
+ fp->active_fec = notif->fec;
+ fp->xcvr_type = notif->xcvr_type;
+ fp->link_down_reason = notif->link_down_reason;
+ fp->lp_advertising = be64_to_cpu(notif->lp_advertising);
+
+ if ((notif->link_state | notif->missed_events) & FUN_PORT_FLAG_MAC_DOWN)
+ netif_carrier_off(netdev);
+ if (notif->link_state & FUN_PORT_FLAG_MAC_UP)
+ netif_carrier_on(netdev);
+
+ write_seqcount_end(&fp->link_seq);
+ fun_report_link(netdev);
+}
+
+/* handler for async events delivered through the admin CQ */
+static void fun_event_cb(struct fun_dev *fdev, void *entry)
+{
+ u8 op = ((struct fun_admin_rsp_common *)entry)->op;
+
+ if (op == FUN_ADMIN_OP_PORT) {
+ const struct fun_admin_port_notif *rsp = entry;
+
+ if (rsp->subop == FUN_ADMIN_SUBOP_NOTIFY) {
+ fun_update_link_state(to_fun_ethdev(fdev), rsp);
+ } else if (rsp->subop == FUN_ADMIN_SUBOP_RES_COUNT) {
+ const struct fun_admin_res_count_rsp *r = entry;
+
+ if (r->count.data)
+ set_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags);
+ else
+ set_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags);
+ fun_serv_sched(fdev);
+ } else {
+ dev_info(fdev->dev, "adminq event unexpected op %u subop %u",
+ op, rsp->subop);
+ }
+ } else {
+ dev_info(fdev->dev, "adminq event unexpected op %u", op);
+ }
+}
+
+/* handler for pending work managed by the service task */
+static void fun_service_cb(struct fun_dev *fdev)
+{
+ struct fun_ethdev *ed = to_fun_ethdev(fdev);
+ int rc;
+
+ if (test_and_clear_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags))
+ fun_destroy_ports(ed);
+
+ if (!test_and_clear_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags))
+ return;
+
+ rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT);
+ if (rc < 0 || rc == ed->num_ports)
+ return;
+
+ if (ed->num_ports)
+ fun_destroy_ports(ed);
+ if (rc)
+ fun_create_ports(ed, rc);
+}
+
+static int funeth_sriov_configure(struct pci_dev *pdev, int nvfs)
+{
+ struct fun_dev *fdev = pci_get_drvdata(pdev);
+ struct fun_ethdev *ed = to_fun_ethdev(fdev);
+ int rc;
+
+ if (nvfs == 0) {
+ if (pci_vfs_assigned(pdev)) {
+ dev_warn(&pdev->dev,
+ "Cannot disable SR-IOV while VFs are assigned\n");
+ return -EPERM;
+ }
+
+ mutex_lock(&ed->state_mutex);
+ fun_free_vports(ed);
+ mutex_unlock(&ed->state_mutex);
+ pci_disable_sriov(pdev);
+ return 0;
+ }
+
+ rc = pci_enable_sriov(pdev, nvfs);
+ if (rc)
+ return rc;
+
+ mutex_lock(&ed->state_mutex);
+ rc = fun_init_vports(ed, nvfs);
+ mutex_unlock(&ed->state_mutex);
+ if (rc) {
+ pci_disable_sriov(pdev);
+ return rc;
+ }
+
+ return nvfs;
+}
+
+static int funeth_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct fun_dev_params aqreq = {
+ .cqe_size_log2 = ilog2(ADMIN_CQE_SIZE),
+ .sqe_size_log2 = ilog2(ADMIN_SQE_SIZE),
+ .cq_depth = ADMIN_CQ_DEPTH,
+ .sq_depth = ADMIN_SQ_DEPTH,
+ .rq_depth = ADMIN_RQ_DEPTH,
+ .min_msix = 2, /* 1 Rx + 1 Tx */
+ .event_cb = fun_event_cb,
+ .serv_cb = fun_service_cb,
+ };
+ struct devlink *devlink;
+ struct fun_ethdev *ed;
+ struct fun_dev *fdev;
+ int rc;
+
+ devlink = fun_devlink_alloc(&pdev->dev);
+ if (!devlink) {
+ dev_err(&pdev->dev, "devlink alloc failed\n");
+ return -ENOMEM;
+ }
+
+ ed = devlink_priv(devlink);
+ mutex_init(&ed->state_mutex);
+
+ fdev = &ed->fdev;
+ rc = fun_dev_enable(fdev, pdev, &aqreq, KBUILD_MODNAME);
+ if (rc)
+ goto free_devlink;
+
+ rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT);
+ if (rc > 0)
+ rc = fun_create_ports(ed, rc);
+ if (rc < 0)
+ goto disable_dev;
+
+ fun_serv_restart(fdev);
+ fun_devlink_register(devlink);
+ return 0;
+
+disable_dev:
+ fun_dev_disable(fdev);
+free_devlink:
+ mutex_destroy(&ed->state_mutex);
+ fun_devlink_free(devlink);
+ return rc;
+}
+
+static void funeth_remove(struct pci_dev *pdev)
+{
+ struct fun_dev *fdev = pci_get_drvdata(pdev);
+ struct devlink *devlink;
+ struct fun_ethdev *ed;
+
+ ed = to_fun_ethdev(fdev);
+ devlink = priv_to_devlink(ed);
+ fun_devlink_unregister(devlink);
+
+#ifdef CONFIG_PCI_IOV
+ funeth_sriov_configure(pdev, 0);
+#endif
+
+ fun_serv_stop(fdev);
+ fun_destroy_ports(ed);
+ fun_dev_disable(fdev);
+ mutex_destroy(&ed->state_mutex);
+
+ fun_devlink_free(devlink);
+}
+
+static struct pci_driver funeth_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = funeth_id_table,
+ .probe = funeth_probe,
+ .remove = funeth_remove,
+ .shutdown = funeth_remove,
+ .sriov_configure = funeth_sriov_configure,
+};
+
+module_pci_driver(funeth_driver);
+
+MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>");
+MODULE_DESCRIPTION("Fungible Ethernet Network Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DEVICE_TABLE(pci, funeth_id_table);
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
new file mode 100644
index 000000000..29a6c2ede
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c
@@ -0,0 +1,829 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include <linux/bpf_trace.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/filter.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+#include "funeth_txrx.h"
+#include "funeth.h"
+#include "fun_queue.h"
+
+#define CREATE_TRACE_POINTS
+#include "funeth_trace.h"
+
+/* Given the device's max supported MTU and pages of at least 4KB a packet can
+ * be scattered into at most 4 buffers.
+ */
+#define RX_MAX_FRAGS 4
+
+/* Per packet headroom in non-XDP mode. Present only for 1-frag packets. */
+#define FUN_RX_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
+
+/* We try to reuse pages for our buffers. To avoid frequent page ref writes we
+ * take EXTRA_PAGE_REFS references at once and then hand them out one per packet
+ * occupying the buffer.
+ */
+#define EXTRA_PAGE_REFS 1000000
+#define MIN_PAGE_REFS 1000
+
+enum {
+ FUN_XDP_FLUSH_REDIR = 1,
+ FUN_XDP_FLUSH_TX = 2,
+};
+
+/* See if a page is running low on refs we are holding and if so take more. */
+static void refresh_refs(struct funeth_rxbuf *buf)
+{
+ if (unlikely(buf->pg_refs < MIN_PAGE_REFS)) {
+ buf->pg_refs += EXTRA_PAGE_REFS;
+ page_ref_add(buf->page, EXTRA_PAGE_REFS);
+ }
+}
+
+/* Offer a buffer to the Rx buffer cache. The cache will hold the buffer if its
+ * page is worth retaining and there's room for it. Otherwise the page is
+ * unmapped and our references released.
+ */
+static void cache_offer(struct funeth_rxq *q, const struct funeth_rxbuf *buf)
+{
+ struct funeth_rx_cache *c = &q->cache;
+
+ if (c->prod_cnt - c->cons_cnt <= c->mask && buf->node == numa_mem_id()) {
+ c->bufs[c->prod_cnt & c->mask] = *buf;
+ c->prod_cnt++;
+ } else {
+ dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ __page_frag_cache_drain(buf->page, buf->pg_refs);
+ }
+}
+
+/* Get a page from the Rx buffer cache. We only consider the next available
+ * page and return it if we own all its references.
+ */
+static bool cache_get(struct funeth_rxq *q, struct funeth_rxbuf *rb)
+{
+ struct funeth_rx_cache *c = &q->cache;
+ struct funeth_rxbuf *buf;
+
+ if (c->prod_cnt == c->cons_cnt)
+ return false; /* empty cache */
+
+ buf = &c->bufs[c->cons_cnt & c->mask];
+ if (page_ref_count(buf->page) == buf->pg_refs) {
+ dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ *rb = *buf;
+ buf->page = NULL;
+ refresh_refs(rb);
+ c->cons_cnt++;
+ return true;
+ }
+
+ /* Page can't be reused. If the cache is full drop this page. */
+ if (c->prod_cnt - c->cons_cnt > c->mask) {
+ dma_unmap_page_attrs(q->dma_dev, buf->dma_addr, PAGE_SIZE,
+ DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ __page_frag_cache_drain(buf->page, buf->pg_refs);
+ buf->page = NULL;
+ c->cons_cnt++;
+ }
+ return false;
+}
+
+/* Allocate and DMA-map a page for receive. */
+static int funeth_alloc_page(struct funeth_rxq *q, struct funeth_rxbuf *rb,
+ int node, gfp_t gfp)
+{
+ struct page *p;
+
+ if (cache_get(q, rb))
+ return 0;
+
+ p = __alloc_pages_node(node, gfp | __GFP_NOWARN, 0);
+ if (unlikely(!p))
+ return -ENOMEM;
+
+ rb->dma_addr = dma_map_page(q->dma_dev, p, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(q->dma_dev, rb->dma_addr))) {
+ FUN_QSTAT_INC(q, rx_map_err);
+ __free_page(p);
+ return -ENOMEM;
+ }
+
+ FUN_QSTAT_INC(q, rx_page_alloc);
+
+ rb->page = p;
+ rb->pg_refs = 1;
+ refresh_refs(rb);
+ rb->node = page_is_pfmemalloc(p) ? -1 : page_to_nid(p);
+ return 0;
+}
+
+static void funeth_free_page(struct funeth_rxq *q, struct funeth_rxbuf *rb)
+{
+ if (rb->page) {
+ dma_unmap_page(q->dma_dev, rb->dma_addr, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ __page_frag_cache_drain(rb->page, rb->pg_refs);
+ rb->page = NULL;
+ }
+}
+
+/* Run the XDP program assigned to an Rx queue.
+ * Return %NULL if the buffer is consumed, or the virtual address of the packet
+ * to turn into an skb.
+ */
+static void *fun_run_xdp(struct funeth_rxq *q, skb_frag_t *frags, void *buf_va,
+ int ref_ok, struct funeth_txq *xdp_q)
+{
+ struct bpf_prog *xdp_prog;
+ struct xdp_frame *xdpf;
+ struct xdp_buff xdp;
+ u32 act;
+
+ /* VA includes the headroom, frag size includes headroom + tailroom */
+ xdp_init_buff(&xdp, ALIGN(skb_frag_size(frags), FUN_EPRQ_PKT_ALIGN),
+ &q->xdp_rxq);
+ xdp_prepare_buff(&xdp, buf_va, FUN_XDP_HEADROOM, skb_frag_size(frags) -
+ (FUN_RX_TAILROOM + FUN_XDP_HEADROOM), false);
+
+ xdp_prog = READ_ONCE(q->xdp_prog);
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ switch (act) {
+ case XDP_PASS:
+ /* remove headroom, which may not be FUN_XDP_HEADROOM now */
+ skb_frag_size_set(frags, xdp.data_end - xdp.data);
+ skb_frag_off_add(frags, xdp.data - xdp.data_hard_start);
+ goto pass;
+ case XDP_TX:
+ if (unlikely(!ref_ok))
+ goto pass;
+
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ if (!xdpf || !fun_xdp_tx(xdp_q, xdpf))
+ goto xdp_error;
+ FUN_QSTAT_INC(q, xdp_tx);
+ q->xdp_flush |= FUN_XDP_FLUSH_TX;
+ break;
+ case XDP_REDIRECT:
+ if (unlikely(!ref_ok))
+ goto pass;
+ if (unlikely(xdp_do_redirect(q->netdev, &xdp, xdp_prog)))
+ goto xdp_error;
+ FUN_QSTAT_INC(q, xdp_redir);
+ q->xdp_flush |= FUN_XDP_FLUSH_REDIR;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(q->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(q->netdev, xdp_prog, act);
+xdp_error:
+ q->cur_buf->pg_refs++; /* return frags' page reference */
+ FUN_QSTAT_INC(q, xdp_err);
+ break;
+ case XDP_DROP:
+ q->cur_buf->pg_refs++;
+ FUN_QSTAT_INC(q, xdp_drops);
+ break;
+ }
+ return NULL;
+
+pass:
+ return xdp.data;
+}
+
+/* A CQE contains a fixed completion structure along with optional metadata and
+ * even packet data. Given the start address of a CQE return the start of the
+ * contained fixed structure, which lies at the end.
+ */
+static const void *cqe_to_info(const void *cqe)
+{
+ return cqe + FUNETH_CQE_INFO_OFFSET;
+}
+
+/* The inverse of cqe_to_info(). */
+static const void *info_to_cqe(const void *cqe_info)
+{
+ return cqe_info - FUNETH_CQE_INFO_OFFSET;
+}
+
+/* Return the type of hash provided by the device based on the L3 and L4
+ * protocols it parsed for the packet.
+ */
+static enum pkt_hash_types cqe_to_pkt_hash_type(u16 pkt_parse)
+{
+ static const enum pkt_hash_types htype_map[] = {
+ PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
+ PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L4,
+ PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3,
+ PKT_HASH_TYPE_NONE, PKT_HASH_TYPE_L3
+ };
+ u16 key;
+
+ /* Build the key from the TCP/UDP and IP/IPv6 bits */
+ key = ((pkt_parse >> FUN_ETH_RX_CV_OL4_PROT_S) & 6) |
+ ((pkt_parse >> (FUN_ETH_RX_CV_OL3_PROT_S + 1)) & 1);
+
+ return htype_map[key];
+}
+
+/* Each received packet can be scattered across several Rx buffers or can
+ * share a buffer with previously received packets depending on the buffer
+ * and packet sizes and the room available in the most recently used buffer.
+ *
+ * The rules are:
+ * - If the buffer at the head of an RQ has not been used it gets (part of) the
+ * next incoming packet.
+ * - Otherwise, if the packet fully fits in the buffer's remaining space the
+ * packet is written there.
+ * - Otherwise, the packet goes into the next Rx buffer.
+ *
+ * This function returns the Rx buffer for a packet or fragment thereof of the
+ * given length. If it isn't @buf it either recycles or frees that buffer
+ * before advancing the queue to the next buffer.
+ *
+ * If called repeatedly with the remaining length of a packet it will walk
+ * through all the buffers containing the packet.
+ */
+static struct funeth_rxbuf *
+get_buf(struct funeth_rxq *q, struct funeth_rxbuf *buf, unsigned int len)
+{
+ if (q->buf_offset + len <= PAGE_SIZE || !q->buf_offset)
+ return buf; /* @buf holds (part of) the packet */
+
+ /* The packet occupies part of the next buffer. Move there after
+ * replenishing the current buffer slot either with the spare page or
+ * by reusing the slot's existing page. Note that if a spare page isn't
+ * available and the current packet occupies @buf it is a multi-frag
+ * packet that will be dropped leaving @buf available for reuse.
+ */
+ if ((page_ref_count(buf->page) == buf->pg_refs &&
+ buf->node == numa_mem_id()) || !q->spare_buf.page) {
+ dma_sync_single_for_device(q->dma_dev, buf->dma_addr,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ refresh_refs(buf);
+ } else {
+ cache_offer(q, buf);
+ *buf = q->spare_buf;
+ q->spare_buf.page = NULL;
+ q->rqes[q->rq_cons & q->rq_mask] =
+ FUN_EPRQ_RQBUF_INIT(buf->dma_addr);
+ }
+ q->buf_offset = 0;
+ q->rq_cons++;
+ return &q->bufs[q->rq_cons & q->rq_mask];
+}
+
+/* Gather the page fragments making up the first Rx packet on @q. Its total
+ * length @tot_len includes optional head- and tail-rooms.
+ *
+ * Return 0 if the device retains ownership of at least some of the pages.
+ * In this case the caller may only copy the packet.
+ *
+ * A non-zero return value gives the caller permission to use references to the
+ * pages, e.g., attach them to skbs. Additionally, if the value is <0 at least
+ * one of the pages is PF_MEMALLOC.
+ *
+ * Regardless of outcome the caller is granted a reference to each of the pages.
+ */
+static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len,
+ skb_frag_t *frags)
+{
+ struct funeth_rxbuf *buf = q->cur_buf;
+ unsigned int frag_len;
+ int ref_ok = 1;
+
+ for (;;) {
+ buf = get_buf(q, buf, tot_len);
+
+ /* We always keep the RQ full of buffers so before we can give
+ * one of our pages to the stack we require that we can obtain
+ * a replacement page. If we can't the packet will either be
+ * copied or dropped so we can retain ownership of the page and
+ * reuse it.
+ */
+ if (!q->spare_buf.page &&
+ funeth_alloc_page(q, &q->spare_buf, numa_mem_id(),
+ GFP_ATOMIC | __GFP_MEMALLOC))
+ ref_ok = 0;
+
+ frag_len = min_t(unsigned int, tot_len,
+ PAGE_SIZE - q->buf_offset);
+ dma_sync_single_for_cpu(q->dma_dev,
+ buf->dma_addr + q->buf_offset,
+ frag_len, DMA_FROM_DEVICE);
+ buf->pg_refs--;
+ if (ref_ok)
+ ref_ok |= buf->node;
+
+ __skb_frag_set_page(frags, buf->page);
+ skb_frag_off_set(frags, q->buf_offset);
+ skb_frag_size_set(frags++, frag_len);
+
+ tot_len -= frag_len;
+ if (!tot_len)
+ break;
+
+ q->buf_offset = PAGE_SIZE;
+ }
+ q->buf_offset = ALIGN(q->buf_offset + frag_len, FUN_EPRQ_PKT_ALIGN);
+ q->cur_buf = buf;
+ return ref_ok;
+}
+
+static bool rx_hwtstamp_enabled(const struct net_device *dev)
+{
+ const struct funeth_priv *d = netdev_priv(dev);
+
+ return d->hwtstamp_cfg.rx_filter == HWTSTAMP_FILTER_ALL;
+}
+
+/* Advance the CQ pointers and phase tag to the next CQE. */
+static void advance_cq(struct funeth_rxq *q)
+{
+ if (unlikely(q->cq_head == q->cq_mask)) {
+ q->cq_head = 0;
+ q->phase ^= 1;
+ q->next_cqe_info = cqe_to_info(q->cqes);
+ } else {
+ q->cq_head++;
+ q->next_cqe_info += FUNETH_CQE_SIZE;
+ }
+ prefetch(q->next_cqe_info);
+}
+
+/* Process the packet represented by the head CQE of @q. Gather the packet's
+ * fragments, run it through the optional XDP program, and if needed construct
+ * an skb and pass it to the stack.
+ */
+static void fun_handle_cqe_pkt(struct funeth_rxq *q, struct funeth_txq *xdp_q)
+{
+ const struct fun_eth_cqe *rxreq = info_to_cqe(q->next_cqe_info);
+ unsigned int i, tot_len, pkt_len = be32_to_cpu(rxreq->pkt_len);
+ struct net_device *ndev = q->netdev;
+ skb_frag_t frags[RX_MAX_FRAGS];
+ struct skb_shared_info *si;
+ unsigned int headroom;
+ gro_result_t gro_res;
+ struct sk_buff *skb;
+ int ref_ok;
+ void *va;
+ u16 cv;
+
+ u64_stats_update_begin(&q->syncp);
+ q->stats.rx_pkts++;
+ q->stats.rx_bytes += pkt_len;
+ u64_stats_update_end(&q->syncp);
+
+ advance_cq(q);
+
+ /* account for head- and tail-room, present only for 1-buffer packets */
+ tot_len = pkt_len;
+ headroom = be16_to_cpu(rxreq->headroom);
+ if (likely(headroom))
+ tot_len += FUN_RX_TAILROOM + headroom;
+
+ ref_ok = fun_gather_pkt(q, tot_len, frags);
+ va = skb_frag_address(frags);
+ if (xdp_q && headroom == FUN_XDP_HEADROOM) {
+ va = fun_run_xdp(q, frags, va, ref_ok, xdp_q);
+ if (!va)
+ return;
+ headroom = 0; /* XDP_PASS trims it */
+ }
+ if (unlikely(!ref_ok))
+ goto no_mem;
+
+ if (likely(headroom)) {
+ /* headroom is either FUN_RX_HEADROOM or FUN_XDP_HEADROOM */
+ prefetch(va + headroom);
+ skb = napi_build_skb(va, ALIGN(tot_len, FUN_EPRQ_PKT_ALIGN));
+ if (unlikely(!skb))
+ goto no_mem;
+
+ skb_reserve(skb, headroom);
+ __skb_put(skb, pkt_len);
+ skb->protocol = eth_type_trans(skb, ndev);
+ } else {
+ prefetch(va);
+ skb = napi_get_frags(q->napi);
+ if (unlikely(!skb))
+ goto no_mem;
+
+ if (ref_ok < 0)
+ skb->pfmemalloc = 1;
+
+ si = skb_shinfo(skb);
+ si->nr_frags = rxreq->nsgl;
+ for (i = 0; i < si->nr_frags; i++)
+ si->frags[i] = frags[i];
+
+ skb->len = pkt_len;
+ skb->data_len = pkt_len;
+ skb->truesize += round_up(pkt_len, FUN_EPRQ_PKT_ALIGN);
+ }
+
+ skb_record_rx_queue(skb, q->qidx);
+ cv = be16_to_cpu(rxreq->pkt_cv);
+ if (likely((q->netdev->features & NETIF_F_RXHASH) && rxreq->hash))
+ skb_set_hash(skb, be32_to_cpu(rxreq->hash),
+ cqe_to_pkt_hash_type(cv));
+ if (likely((q->netdev->features & NETIF_F_RXCSUM) && rxreq->csum)) {
+ FUN_QSTAT_INC(q, rx_cso);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = be16_to_cpu(rxreq->csum) - 1;
+ }
+ if (unlikely(rx_hwtstamp_enabled(q->netdev)))
+ skb_hwtstamps(skb)->hwtstamp = be64_to_cpu(rxreq->timestamp);
+
+ trace_funeth_rx(q, rxreq->nsgl, pkt_len, skb->hash, cv);
+
+ gro_res = skb->data_len ? napi_gro_frags(q->napi) :
+ napi_gro_receive(q->napi, skb);
+ if (gro_res == GRO_MERGED || gro_res == GRO_MERGED_FREE)
+ FUN_QSTAT_INC(q, gro_merged);
+ else if (gro_res == GRO_HELD)
+ FUN_QSTAT_INC(q, gro_pkts);
+ return;
+
+no_mem:
+ FUN_QSTAT_INC(q, rx_mem_drops);
+
+ /* Release the references we've been granted for the frag pages.
+ * We return the ref of the last frag and free the rest.
+ */
+ q->cur_buf->pg_refs++;
+ for (i = 0; i < rxreq->nsgl - 1; i++)
+ __free_page(skb_frag_page(frags + i));
+}
+
+/* Return 0 if the phase tag of the CQE at the CQ's head matches expectations
+ * indicating the CQE is new.
+ */
+static u16 cqe_phase_mismatch(const struct fun_cqe_info *ci, u16 phase)
+{
+ u16 sf_p = be16_to_cpu(ci->sf_p);
+
+ return (sf_p & 1) ^ phase;
+}
+
+/* Walk through a CQ identifying and processing fresh CQEs up to the given
+ * budget. Return the remaining budget.
+ */
+static int fun_process_cqes(struct funeth_rxq *q, int budget)
+{
+ struct funeth_priv *fp = netdev_priv(q->netdev);
+ struct funeth_txq **xdpqs, *xdp_q = NULL;
+
+ xdpqs = rcu_dereference_bh(fp->xdpqs);
+ if (xdpqs)
+ xdp_q = xdpqs[smp_processor_id()];
+
+ while (budget && !cqe_phase_mismatch(q->next_cqe_info, q->phase)) {
+ /* access other descriptor fields after the phase check */
+ dma_rmb();
+
+ fun_handle_cqe_pkt(q, xdp_q);
+ budget--;
+ }
+
+ if (unlikely(q->xdp_flush)) {
+ if (q->xdp_flush & FUN_XDP_FLUSH_TX)
+ fun_txq_wr_db(xdp_q);
+ if (q->xdp_flush & FUN_XDP_FLUSH_REDIR)
+ xdp_do_flush();
+ q->xdp_flush = 0;
+ }
+
+ return budget;
+}
+
+/* NAPI handler for Rx queues. Calls the CQE processing loop and writes RQ/CQ
+ * doorbells as needed.
+ */
+int fun_rxq_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
+ struct funeth_rxq *q = irq->rxq;
+ int work_done = budget - fun_process_cqes(q, budget);
+ u32 cq_db_val = q->cq_head;
+
+ if (unlikely(work_done >= budget))
+ FUN_QSTAT_INC(q, rx_budget);
+ else if (napi_complete_done(napi, work_done))
+ cq_db_val |= q->irq_db_val;
+
+ /* check whether to post new Rx buffers */
+ if (q->rq_cons - q->rq_cons_db >= q->rq_db_thres) {
+ u64_stats_update_begin(&q->syncp);
+ q->stats.rx_bufs += q->rq_cons - q->rq_cons_db;
+ u64_stats_update_end(&q->syncp);
+ q->rq_cons_db = q->rq_cons;
+ writel((q->rq_cons - 1) & q->rq_mask, q->rq_db);
+ }
+
+ writel(cq_db_val, q->cq_db);
+ return work_done;
+}
+
+/* Free the Rx buffers of an Rx queue. */
+static void fun_rxq_free_bufs(struct funeth_rxq *q)
+{
+ struct funeth_rxbuf *b = q->bufs;
+ unsigned int i;
+
+ for (i = 0; i <= q->rq_mask; i++, b++)
+ funeth_free_page(q, b);
+
+ funeth_free_page(q, &q->spare_buf);
+ q->cur_buf = NULL;
+}
+
+/* Initially provision an Rx queue with Rx buffers. */
+static int fun_rxq_alloc_bufs(struct funeth_rxq *q, int node)
+{
+ struct funeth_rxbuf *b = q->bufs;
+ unsigned int i;
+
+ for (i = 0; i <= q->rq_mask; i++, b++) {
+ if (funeth_alloc_page(q, b, node, GFP_KERNEL)) {
+ fun_rxq_free_bufs(q);
+ return -ENOMEM;
+ }
+ q->rqes[i] = FUN_EPRQ_RQBUF_INIT(b->dma_addr);
+ }
+ q->cur_buf = q->bufs;
+ return 0;
+}
+
+/* Initialize a used-buffer cache of the given depth. */
+static int fun_rxq_init_cache(struct funeth_rx_cache *c, unsigned int depth,
+ int node)
+{
+ c->mask = depth - 1;
+ c->bufs = kvzalloc_node(depth * sizeof(*c->bufs), GFP_KERNEL, node);
+ return c->bufs ? 0 : -ENOMEM;
+}
+
+/* Deallocate an Rx queue's used-buffer cache and its contents. */
+static void fun_rxq_free_cache(struct funeth_rxq *q)
+{
+ struct funeth_rxbuf *b = q->cache.bufs;
+ unsigned int i;
+
+ for (i = 0; i <= q->cache.mask; i++, b++)
+ funeth_free_page(q, b);
+
+ kvfree(q->cache.bufs);
+ q->cache.bufs = NULL;
+}
+
+int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog)
+{
+ struct funeth_priv *fp = netdev_priv(q->netdev);
+ struct fun_admin_epcq_req cmd;
+ u16 headroom;
+ int err;
+
+ headroom = prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
+ if (headroom != q->headroom) {
+ cmd.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_EPCQ,
+ sizeof(cmd));
+ cmd.u.modify =
+ FUN_ADMIN_EPCQ_MODIFY_REQ_INIT(FUN_ADMIN_SUBOP_MODIFY,
+ 0, q->hw_cqid, headroom);
+ err = fun_submit_admin_sync_cmd(fp->fdev, &cmd.common, NULL, 0,
+ 0);
+ if (err)
+ return err;
+ q->headroom = headroom;
+ }
+
+ WRITE_ONCE(q->xdp_prog, prog);
+ return 0;
+}
+
+/* Create an Rx queue, allocating the host memory it needs. */
+static struct funeth_rxq *fun_rxq_create_sw(struct net_device *dev,
+ unsigned int qidx,
+ unsigned int ncqe,
+ unsigned int nrqe,
+ struct fun_irq *irq)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct funeth_rxq *q;
+ int err = -ENOMEM;
+ int numa_node;
+
+ numa_node = fun_irq_node(irq);
+ q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
+ if (!q)
+ goto err;
+
+ q->qidx = qidx;
+ q->netdev = dev;
+ q->cq_mask = ncqe - 1;
+ q->rq_mask = nrqe - 1;
+ q->numa_node = numa_node;
+ q->rq_db_thres = nrqe / 4;
+ u64_stats_init(&q->syncp);
+ q->dma_dev = &fp->pdev->dev;
+
+ q->rqes = fun_alloc_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes),
+ sizeof(*q->bufs), false, numa_node,
+ &q->rq_dma_addr, (void **)&q->bufs, NULL);
+ if (!q->rqes)
+ goto free_q;
+
+ q->cqes = fun_alloc_ring_mem(q->dma_dev, ncqe, FUNETH_CQE_SIZE, 0,
+ false, numa_node, &q->cq_dma_addr, NULL,
+ NULL);
+ if (!q->cqes)
+ goto free_rqes;
+
+ err = fun_rxq_init_cache(&q->cache, nrqe, numa_node);
+ if (err)
+ goto free_cqes;
+
+ err = fun_rxq_alloc_bufs(q, numa_node);
+ if (err)
+ goto free_cache;
+
+ q->stats.rx_bufs = q->rq_mask;
+ q->init_state = FUN_QSTATE_INIT_SW;
+ return q;
+
+free_cache:
+ fun_rxq_free_cache(q);
+free_cqes:
+ dma_free_coherent(q->dma_dev, ncqe * FUNETH_CQE_SIZE, q->cqes,
+ q->cq_dma_addr);
+free_rqes:
+ fun_free_ring_mem(q->dma_dev, nrqe, sizeof(*q->rqes), false, q->rqes,
+ q->rq_dma_addr, q->bufs);
+free_q:
+ kfree(q);
+err:
+ netdev_err(dev, "Unable to allocate memory for Rx queue %u\n", qidx);
+ return ERR_PTR(err);
+}
+
+static void fun_rxq_free_sw(struct funeth_rxq *q)
+{
+ struct funeth_priv *fp = netdev_priv(q->netdev);
+
+ fun_rxq_free_cache(q);
+ fun_rxq_free_bufs(q);
+ fun_free_ring_mem(q->dma_dev, q->rq_mask + 1, sizeof(*q->rqes), false,
+ q->rqes, q->rq_dma_addr, q->bufs);
+ dma_free_coherent(q->dma_dev, (q->cq_mask + 1) * FUNETH_CQE_SIZE,
+ q->cqes, q->cq_dma_addr);
+
+ /* Before freeing the queue transfer key counters to the device. */
+ fp->rx_packets += q->stats.rx_pkts;
+ fp->rx_bytes += q->stats.rx_bytes;
+ fp->rx_dropped += q->stats.rx_map_err + q->stats.rx_mem_drops;
+
+ kfree(q);
+}
+
+/* Create an Rx queue's resources on the device. */
+int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq)
+{
+ struct funeth_priv *fp = netdev_priv(q->netdev);
+ unsigned int ncqe = q->cq_mask + 1;
+ unsigned int nrqe = q->rq_mask + 1;
+ int err;
+
+ err = xdp_rxq_info_reg(&q->xdp_rxq, q->netdev, q->qidx,
+ irq->napi.napi_id);
+ if (err)
+ goto out;
+
+ err = xdp_rxq_info_reg_mem_model(&q->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+ NULL);
+ if (err)
+ goto xdp_unreg;
+
+ q->phase = 1;
+ q->irq_cnt = 0;
+ q->cq_head = 0;
+ q->rq_cons = 0;
+ q->rq_cons_db = 0;
+ q->buf_offset = 0;
+ q->napi = &irq->napi;
+ q->irq_db_val = fp->cq_irq_db;
+ q->next_cqe_info = cqe_to_info(q->cqes);
+
+ q->xdp_prog = fp->xdp_prog;
+ q->headroom = fp->xdp_prog ? FUN_XDP_HEADROOM : FUN_RX_HEADROOM;
+
+ err = fun_sq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
+ FUN_ADMIN_EPSQ_CREATE_FLAG_RQ, 0,
+ FUN_HCI_ID_INVALID, 0, nrqe, q->rq_dma_addr, 0, 0,
+ 0, 0, fp->fdev->kern_end_qid, PAGE_SHIFT,
+ &q->hw_sqid, &q->rq_db);
+ if (err)
+ goto xdp_unreg;
+
+ err = fun_cq_create(fp->fdev, FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR |
+ FUN_ADMIN_EPCQ_CREATE_FLAG_RQ, 0,
+ q->hw_sqid, ilog2(FUNETH_CQE_SIZE), ncqe,
+ q->cq_dma_addr, q->headroom, FUN_RX_TAILROOM, 0, 0,
+ irq->irq_idx, 0, fp->fdev->kern_end_qid,
+ &q->hw_cqid, &q->cq_db);
+ if (err)
+ goto free_rq;
+
+ irq->rxq = q;
+ writel(q->rq_mask, q->rq_db);
+ q->init_state = FUN_QSTATE_INIT_FULL;
+
+ netif_info(fp, ifup, q->netdev,
+ "Rx queue %u, depth %u/%u, HW qid %u/%u, IRQ idx %u, node %d, headroom %u\n",
+ q->qidx, ncqe, nrqe, q->hw_cqid, q->hw_sqid, irq->irq_idx,
+ q->numa_node, q->headroom);
+ return 0;
+
+free_rq:
+ fun_destroy_sq(fp->fdev, q->hw_sqid);
+xdp_unreg:
+ xdp_rxq_info_unreg(&q->xdp_rxq);
+out:
+ netdev_err(q->netdev,
+ "Failed to create Rx queue %u on device, error %d\n",
+ q->qidx, err);
+ return err;
+}
+
+static void fun_rxq_free_dev(struct funeth_rxq *q)
+{
+ struct funeth_priv *fp = netdev_priv(q->netdev);
+ struct fun_irq *irq;
+
+ if (q->init_state < FUN_QSTATE_INIT_FULL)
+ return;
+
+ irq = container_of(q->napi, struct fun_irq, napi);
+ netif_info(fp, ifdown, q->netdev,
+ "Freeing Rx queue %u (id %u/%u), IRQ %u\n",
+ q->qidx, q->hw_cqid, q->hw_sqid, irq->irq_idx);
+
+ irq->rxq = NULL;
+ xdp_rxq_info_unreg(&q->xdp_rxq);
+ fun_destroy_sq(fp->fdev, q->hw_sqid);
+ fun_destroy_cq(fp->fdev, q->hw_cqid);
+ q->init_state = FUN_QSTATE_INIT_SW;
+}
+
+/* Create or advance an Rx queue, allocating all the host and device resources
+ * needed to reach the target state.
+ */
+int funeth_rxq_create(struct net_device *dev, unsigned int qidx,
+ unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq,
+ int state, struct funeth_rxq **qp)
+{
+ struct funeth_rxq *q = *qp;
+ int err;
+
+ if (!q) {
+ q = fun_rxq_create_sw(dev, qidx, ncqe, nrqe, irq);
+ if (IS_ERR(q))
+ return PTR_ERR(q);
+ }
+
+ if (q->init_state >= state)
+ goto out;
+
+ err = fun_rxq_create_dev(q, irq);
+ if (err) {
+ if (!*qp)
+ fun_rxq_free_sw(q);
+ return err;
+ }
+
+out:
+ *qp = q;
+ return 0;
+}
+
+/* Free Rx queue resources until it reaches the target state. */
+struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state)
+{
+ if (state < FUN_QSTATE_INIT_FULL)
+ fun_rxq_free_dev(q);
+
+ if (state == FUN_QSTATE_DESTROYED) {
+ fun_rxq_free_sw(q);
+ q = NULL;
+ }
+
+ return q;
+}
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_trace.h b/drivers/net/ethernet/fungible/funeth/funeth_trace.h
new file mode 100644
index 000000000..9e58dfec1
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_trace.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM funeth
+
+#if !defined(_TRACE_FUNETH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FUNETH_H
+
+#include <linux/tracepoint.h>
+
+#include "funeth_txrx.h"
+
+TRACE_EVENT(funeth_tx,
+
+ TP_PROTO(const struct funeth_txq *txq,
+ u32 len,
+ u32 sqe_idx,
+ u32 ngle),
+
+ TP_ARGS(txq, len, sqe_idx, ngle),
+
+ TP_STRUCT__entry(
+ __field(u32, qidx)
+ __field(u32, len)
+ __field(u32, sqe_idx)
+ __field(u32, ngle)
+ __string(devname, txq->netdev->name)
+ ),
+
+ TP_fast_assign(
+ __entry->qidx = txq->qidx;
+ __entry->len = len;
+ __entry->sqe_idx = sqe_idx;
+ __entry->ngle = ngle;
+ __assign_str(devname, txq->netdev->name);
+ ),
+
+ TP_printk("%s: Txq %u, SQE idx %u, len %u, num GLEs %u",
+ __get_str(devname), __entry->qidx, __entry->sqe_idx,
+ __entry->len, __entry->ngle)
+);
+
+TRACE_EVENT(funeth_tx_free,
+
+ TP_PROTO(const struct funeth_txq *txq,
+ u32 sqe_idx,
+ u32 num_sqes,
+ u32 hw_head),
+
+ TP_ARGS(txq, sqe_idx, num_sqes, hw_head),
+
+ TP_STRUCT__entry(
+ __field(u32, qidx)
+ __field(u32, sqe_idx)
+ __field(u32, num_sqes)
+ __field(u32, hw_head)
+ __string(devname, txq->netdev->name)
+ ),
+
+ TP_fast_assign(
+ __entry->qidx = txq->qidx;
+ __entry->sqe_idx = sqe_idx;
+ __entry->num_sqes = num_sqes;
+ __entry->hw_head = hw_head;
+ __assign_str(devname, txq->netdev->name);
+ ),
+
+ TP_printk("%s: Txq %u, SQE idx %u, SQEs %u, HW head %u",
+ __get_str(devname), __entry->qidx, __entry->sqe_idx,
+ __entry->num_sqes, __entry->hw_head)
+);
+
+TRACE_EVENT(funeth_rx,
+
+ TP_PROTO(const struct funeth_rxq *rxq,
+ u32 num_rqes,
+ u32 pkt_len,
+ u32 hash,
+ u32 cls_vec),
+
+ TP_ARGS(rxq, num_rqes, pkt_len, hash, cls_vec),
+
+ TP_STRUCT__entry(
+ __field(u32, qidx)
+ __field(u32, cq_head)
+ __field(u32, num_rqes)
+ __field(u32, len)
+ __field(u32, hash)
+ __field(u32, cls_vec)
+ __string(devname, rxq->netdev->name)
+ ),
+
+ TP_fast_assign(
+ __entry->qidx = rxq->qidx;
+ __entry->cq_head = rxq->cq_head;
+ __entry->num_rqes = num_rqes;
+ __entry->len = pkt_len;
+ __entry->hash = hash;
+ __entry->cls_vec = cls_vec;
+ __assign_str(devname, rxq->netdev->name);
+ ),
+
+ TP_printk("%s: Rxq %u, CQ head %u, RQEs %u, len %u, hash %u, CV %#x",
+ __get_str(devname), __entry->qidx, __entry->cq_head,
+ __entry->num_rqes, __entry->len, __entry->hash,
+ __entry->cls_vec)
+);
+
+#endif /* _TRACE_FUNETH_H */
+
+/* Below must be outside protection. */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE funeth_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
new file mode 100644
index 000000000..706d81e39
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c
@@ -0,0 +1,801 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include <linux/dma-mapping.h>
+#include <linux/ip.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <uapi/linux/udp.h>
+#include "funeth.h"
+#include "funeth_ktls.h"
+#include "funeth_txrx.h"
+#include "funeth_trace.h"
+#include "fun_queue.h"
+
+#define FUN_XDP_CLEAN_THRES 32
+#define FUN_XDP_CLEAN_BATCH 16
+
+/* DMA-map a packet and return the (length, DMA_address) pairs for its
+ * segments. If a mapping error occurs -ENOMEM is returned. The packet
+ * consists of an skb_shared_info and one additional address/length pair.
+ */
+static int fun_map_pkt(struct device *dev, const struct skb_shared_info *si,
+ void *data, unsigned int data_len,
+ dma_addr_t *addr, unsigned int *len)
+{
+ const skb_frag_t *fp, *end;
+
+ *len = data_len;
+ *addr = dma_map_single(dev, data, *len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, *addr))
+ return -ENOMEM;
+
+ if (!si)
+ return 0;
+
+ for (fp = si->frags, end = fp + si->nr_frags; fp < end; fp++) {
+ *++len = skb_frag_size(fp);
+ *++addr = skb_frag_dma_map(dev, fp, 0, *len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, *addr))
+ goto unwind;
+ }
+ return 0;
+
+unwind:
+ while (fp-- > si->frags)
+ dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE);
+
+ dma_unmap_single(dev, addr[-1], data_len, DMA_TO_DEVICE);
+ return -ENOMEM;
+}
+
+/* Return the address just past the end of a Tx queue's descriptor ring.
+ * It exploits the fact that the HW writeback area is just after the end
+ * of the descriptor ring.
+ */
+static void *txq_end(const struct funeth_txq *q)
+{
+ return (void *)q->hw_wb;
+}
+
+/* Return the amount of space within a Tx ring from the given address to the
+ * end.
+ */
+static unsigned int txq_to_end(const struct funeth_txq *q, void *p)
+{
+ return txq_end(q) - p;
+}
+
+/* Return the number of Tx descriptors occupied by a Tx request. */
+static unsigned int tx_req_ndesc(const struct fun_eth_tx_req *req)
+{
+ return DIV_ROUND_UP(req->len8, FUNETH_SQE_SIZE / 8);
+}
+
+/* Write a gather list to the Tx descriptor at @req from @ngle address/length
+ * pairs.
+ */
+static struct fun_dataop_gl *fun_write_gl(const struct funeth_txq *q,
+ struct fun_eth_tx_req *req,
+ const dma_addr_t *addrs,
+ const unsigned int *lens,
+ unsigned int ngle)
+{
+ struct fun_dataop_gl *gle;
+ unsigned int i;
+
+ req->len8 = (sizeof(*req) + ngle * sizeof(*gle)) / 8;
+
+ for (i = 0, gle = (struct fun_dataop_gl *)req->dataop.imm;
+ i < ngle && txq_to_end(q, gle); i++, gle++)
+ fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
+
+ if (txq_to_end(q, gle) == 0) {
+ gle = (struct fun_dataop_gl *)q->desc;
+ for ( ; i < ngle; i++, gle++)
+ fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
+ }
+
+ return gle;
+}
+
+static __be16 tcp_hdr_doff_flags(const struct tcphdr *th)
+{
+ return *(__be16 *)&tcp_flag_word(th);
+}
+
+static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q,
+ unsigned int *tls_len)
+{
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+ const struct fun_ktls_tx_ctx *tls_ctx;
+ u32 datalen, seq;
+
+ datalen = skb->len - skb_tcp_all_headers(skb);
+ if (!datalen)
+ return skb;
+
+ if (likely(!tls_offload_tx_resync_pending(skb->sk))) {
+ seq = ntohl(tcp_hdr(skb)->seq);
+ tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
+
+ if (likely(tls_ctx->next_seq == seq)) {
+ *tls_len = datalen;
+ return skb;
+ }
+ if (seq - tls_ctx->next_seq < U32_MAX / 4) {
+ tls_offload_tx_resync_request(skb->sk, seq,
+ tls_ctx->next_seq);
+ }
+ }
+
+ FUN_QSTAT_INC(q, tx_tls_fallback);
+ skb = tls_encrypt_skb(skb);
+ if (!skb)
+ FUN_QSTAT_INC(q, tx_tls_drops);
+
+ return skb;
+#else
+ return NULL;
+#endif
+}
+
+/* Write as many descriptors as needed for the supplied skb starting at the
+ * current producer location. The caller has made certain enough descriptors
+ * are available.
+ *
+ * Returns the number of descriptors written, 0 on error.
+ */
+static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
+ unsigned int tls_len)
+{
+ unsigned int extra_bytes = 0, extra_pkts = 0;
+ unsigned int idx = q->prod_cnt & q->mask;
+ const struct skb_shared_info *shinfo;
+ unsigned int lens[MAX_SKB_FRAGS + 1];
+ dma_addr_t addrs[MAX_SKB_FRAGS + 1];
+ struct fun_eth_tx_req *req;
+ struct fun_dataop_gl *gle;
+ const struct tcphdr *th;
+ unsigned int l4_hlen;
+ unsigned int ngle;
+ u16 flags;
+
+ shinfo = skb_shinfo(skb);
+ if (unlikely(fun_map_pkt(q->dma_dev, shinfo, skb->data,
+ skb_headlen(skb), addrs, lens))) {
+ FUN_QSTAT_INC(q, tx_map_err);
+ return 0;
+ }
+
+ req = fun_tx_desc_addr(q, idx);
+ req->op = FUN_ETH_OP_TX;
+ req->len8 = 0;
+ req->flags = 0;
+ req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
+ req->repr_idn = 0;
+ req->encap_proto = 0;
+
+ if (likely(shinfo->gso_size)) {
+ if (skb->encapsulation) {
+ u16 ol4_ofst;
+
+ flags = FUN_ETH_OUTER_EN | FUN_ETH_INNER_LSO |
+ FUN_ETH_UPDATE_INNER_L4_CKSUM |
+ FUN_ETH_UPDATE_OUTER_L3_LEN;
+ if (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
+ SKB_GSO_UDP_TUNNEL_CSUM)) {
+ flags |= FUN_ETH_UPDATE_OUTER_L4_LEN |
+ FUN_ETH_OUTER_UDP;
+ if (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)
+ flags |= FUN_ETH_UPDATE_OUTER_L4_CKSUM;
+ ol4_ofst = skb_transport_offset(skb);
+ } else {
+ ol4_ofst = skb_inner_network_offset(skb);
+ }
+
+ if (ip_hdr(skb)->version == 4)
+ flags |= FUN_ETH_UPDATE_OUTER_L3_CKSUM;
+ else
+ flags |= FUN_ETH_OUTER_IPV6;
+
+ if (skb->inner_network_header) {
+ if (inner_ip_hdr(skb)->version == 4)
+ flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM |
+ FUN_ETH_UPDATE_INNER_L3_LEN;
+ else
+ flags |= FUN_ETH_INNER_IPV6 |
+ FUN_ETH_UPDATE_INNER_L3_LEN;
+ }
+ th = inner_tcp_hdr(skb);
+ l4_hlen = __tcp_hdrlen(th);
+ fun_eth_offload_init(&req->offload, flags,
+ shinfo->gso_size,
+ tcp_hdr_doff_flags(th), 0,
+ skb_inner_network_offset(skb),
+ skb_inner_transport_offset(skb),
+ skb_network_offset(skb), ol4_ofst);
+ FUN_QSTAT_INC(q, tx_encap_tso);
+ } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
+ flags = FUN_ETH_INNER_LSO | FUN_ETH_INNER_UDP |
+ FUN_ETH_UPDATE_INNER_L4_CKSUM |
+ FUN_ETH_UPDATE_INNER_L4_LEN |
+ FUN_ETH_UPDATE_INNER_L3_LEN;
+
+ if (ip_hdr(skb)->version == 4)
+ flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
+ else
+ flags |= FUN_ETH_INNER_IPV6;
+
+ l4_hlen = sizeof(struct udphdr);
+ fun_eth_offload_init(&req->offload, flags,
+ shinfo->gso_size,
+ cpu_to_be16(l4_hlen << 10), 0,
+ skb_network_offset(skb),
+ skb_transport_offset(skb), 0, 0);
+ FUN_QSTAT_INC(q, tx_uso);
+ } else {
+ /* HW considers one set of headers as inner */
+ flags = FUN_ETH_INNER_LSO |
+ FUN_ETH_UPDATE_INNER_L4_CKSUM |
+ FUN_ETH_UPDATE_INNER_L3_LEN;
+ if (shinfo->gso_type & SKB_GSO_TCPV6)
+ flags |= FUN_ETH_INNER_IPV6;
+ else
+ flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
+ th = tcp_hdr(skb);
+ l4_hlen = __tcp_hdrlen(th);
+ fun_eth_offload_init(&req->offload, flags,
+ shinfo->gso_size,
+ tcp_hdr_doff_flags(th), 0,
+ skb_network_offset(skb),
+ skb_transport_offset(skb), 0, 0);
+ FUN_QSTAT_INC(q, tx_tso);
+ }
+
+ u64_stats_update_begin(&q->syncp);
+ q->stats.tx_cso += shinfo->gso_segs;
+ u64_stats_update_end(&q->syncp);
+
+ extra_pkts = shinfo->gso_segs - 1;
+ extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) +
+ l4_hlen) * extra_pkts;
+ } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ flags = FUN_ETH_UPDATE_INNER_L4_CKSUM;
+ if (skb->csum_offset == offsetof(struct udphdr, check))
+ flags |= FUN_ETH_INNER_UDP;
+ fun_eth_offload_init(&req->offload, flags, 0, 0, 0, 0,
+ skb_checksum_start_offset(skb), 0, 0);
+ FUN_QSTAT_INC(q, tx_cso);
+ } else {
+ fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
+ }
+
+ ngle = shinfo->nr_frags + 1;
+ req->dataop = FUN_DATAOP_HDR_INIT(ngle, 0, ngle, 0, skb->len);
+
+ gle = fun_write_gl(q, req, addrs, lens, ngle);
+
+ if (IS_ENABLED(CONFIG_TLS_DEVICE) && unlikely(tls_len)) {
+ struct fun_eth_tls *tls = (struct fun_eth_tls *)gle;
+ struct fun_ktls_tx_ctx *tls_ctx;
+
+ req->len8 += FUNETH_TLS_SZ / 8;
+ req->flags = cpu_to_be16(FUN_ETH_TX_TLS);
+
+ tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
+ tls->tlsid = tls_ctx->tlsid;
+ tls_ctx->next_seq += tls_len;
+
+ u64_stats_update_begin(&q->syncp);
+ q->stats.tx_tls_bytes += tls_len;
+ q->stats.tx_tls_pkts += 1 + extra_pkts;
+ u64_stats_update_end(&q->syncp);
+ }
+
+ u64_stats_update_begin(&q->syncp);
+ q->stats.tx_bytes += skb->len + extra_bytes;
+ q->stats.tx_pkts += 1 + extra_pkts;
+ u64_stats_update_end(&q->syncp);
+
+ q->info[idx].skb = skb;
+
+ trace_funeth_tx(q, skb->len, idx, req->dataop.ngather);
+ return tx_req_ndesc(req);
+}
+
+/* Return the number of available descriptors of a Tx queue.
+ * HW assumes head==tail means the ring is empty so we need to keep one
+ * descriptor unused.
+ */
+static unsigned int fun_txq_avail(const struct funeth_txq *q)
+{
+ return q->mask - q->prod_cnt + q->cons_cnt;
+}
+
+/* Stop a queue if it can't handle another worst-case packet. */
+static void fun_tx_check_stop(struct funeth_txq *q)
+{
+ if (likely(fun_txq_avail(q) >= FUNETH_MAX_PKT_DESC))
+ return;
+
+ netif_tx_stop_queue(q->ndq);
+
+ /* NAPI reclaim is freeing packets in parallel with us and we may race.
+ * We have stopped the queue but check again after synchronizing with
+ * reclaim.
+ */
+ smp_mb();
+ if (likely(fun_txq_avail(q) < FUNETH_MAX_PKT_DESC))
+ FUN_QSTAT_INC(q, tx_nstops);
+ else
+ netif_tx_start_queue(q->ndq);
+}
+
+/* Return true if a queue has enough space to restart. Current condition is
+ * that the queue must be >= 1/4 empty.
+ */
+static bool fun_txq_may_restart(struct funeth_txq *q)
+{
+ return fun_txq_avail(q) >= q->mask / 4;
+}
+
+netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct funeth_priv *fp = netdev_priv(netdev);
+ unsigned int qid = skb_get_queue_mapping(skb);
+ struct funeth_txq *q = fp->txqs[qid];
+ unsigned int tls_len = 0;
+ unsigned int ndesc;
+
+ if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk &&
+ tls_is_sk_tx_device_offloaded(skb->sk)) {
+ skb = fun_tls_tx(skb, q, &tls_len);
+ if (unlikely(!skb))
+ goto dropped;
+ }
+
+ ndesc = write_pkt_desc(skb, q, tls_len);
+ if (unlikely(!ndesc)) {
+ dev_kfree_skb_any(skb);
+ goto dropped;
+ }
+
+ q->prod_cnt += ndesc;
+ fun_tx_check_stop(q);
+
+ skb_tx_timestamp(skb);
+
+ if (__netdev_tx_sent_queue(q->ndq, skb->len, netdev_xmit_more()))
+ fun_txq_wr_db(q);
+ else
+ FUN_QSTAT_INC(q, tx_more);
+
+ return NETDEV_TX_OK;
+
+dropped:
+ /* A dropped packet may be the last one in a xmit_more train,
+ * ring the doorbell just in case.
+ */
+ if (!netdev_xmit_more())
+ fun_txq_wr_db(q);
+ return NETDEV_TX_OK;
+}
+
+/* Return a Tx queue's HW head index written back to host memory. */
+static u16 txq_hw_head(const struct funeth_txq *q)
+{
+ return (u16)be64_to_cpu(*q->hw_wb);
+}
+
+/* Unmap the Tx packet starting at the given descriptor index and
+ * return the number of Tx descriptors it occupied.
+ */
+static unsigned int fun_unmap_pkt(const struct funeth_txq *q, unsigned int idx)
+{
+ const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx);
+ unsigned int ngle = req->dataop.ngather;
+ struct fun_dataop_gl *gle;
+
+ if (ngle) {
+ gle = (struct fun_dataop_gl *)req->dataop.imm;
+ dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data),
+ be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE);
+
+ for (gle++; --ngle && txq_to_end(q, gle); gle++)
+ dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
+ be32_to_cpu(gle->sgl_len),
+ DMA_TO_DEVICE);
+
+ for (gle = (struct fun_dataop_gl *)q->desc; ngle; ngle--, gle++)
+ dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
+ be32_to_cpu(gle->sgl_len),
+ DMA_TO_DEVICE);
+ }
+
+ return tx_req_ndesc(req);
+}
+
+/* Reclaim completed Tx descriptors and free their packets. Restart a stopped
+ * queue if we freed enough descriptors.
+ *
+ * Return true if we exhausted the budget while there is more work to be done.
+ */
+static bool fun_txq_reclaim(struct funeth_txq *q, int budget)
+{
+ unsigned int npkts = 0, nbytes = 0, ndesc = 0;
+ unsigned int head, limit, reclaim_idx;
+
+ /* budget may be 0, e.g., netpoll */
+ limit = budget ? budget : UINT_MAX;
+
+ for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
+ head != reclaim_idx && npkts < limit; head = txq_hw_head(q)) {
+ /* The HW head is continually updated, ensure we don't read
+ * descriptor state before the head tells us to reclaim it.
+ * On the enqueue side the doorbell is an implicit write
+ * barrier.
+ */
+ rmb();
+
+ do {
+ unsigned int pkt_desc = fun_unmap_pkt(q, reclaim_idx);
+ struct sk_buff *skb = q->info[reclaim_idx].skb;
+
+ trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head);
+
+ nbytes += skb->len;
+ napi_consume_skb(skb, budget);
+ ndesc += pkt_desc;
+ reclaim_idx = (reclaim_idx + pkt_desc) & q->mask;
+ npkts++;
+ } while (reclaim_idx != head && npkts < limit);
+ }
+
+ q->cons_cnt += ndesc;
+ netdev_tx_completed_queue(q->ndq, npkts, nbytes);
+ smp_mb(); /* pairs with the one in fun_tx_check_stop() */
+
+ if (unlikely(netif_tx_queue_stopped(q->ndq) &&
+ fun_txq_may_restart(q))) {
+ netif_tx_wake_queue(q->ndq);
+ FUN_QSTAT_INC(q, tx_nrestarts);
+ }
+
+ return reclaim_idx != head;
+}
+
+/* The NAPI handler for Tx queues. */
+int fun_txq_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
+ struct funeth_txq *q = irq->txq;
+ unsigned int db_val;
+
+ if (fun_txq_reclaim(q, budget))
+ return budget; /* exhausted budget */
+
+ napi_complete(napi); /* exhausted pending work */
+ db_val = READ_ONCE(q->irq_db_val) | (q->cons_cnt & q->mask);
+ writel(db_val, q->db);
+ return 0;
+}
+
+/* Reclaim up to @budget completed Tx packets from a TX XDP queue. */
+static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
+{
+ unsigned int npkts = 0, ndesc = 0, head, reclaim_idx;
+
+ for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
+ head != reclaim_idx && npkts < budget; head = txq_hw_head(q)) {
+ /* The HW head is continually updated, ensure we don't read
+ * descriptor state before the head tells us to reclaim it.
+ * On the enqueue side the doorbell is an implicit write
+ * barrier.
+ */
+ rmb();
+
+ do {
+ unsigned int pkt_desc = fun_unmap_pkt(q, reclaim_idx);
+
+ xdp_return_frame(q->info[reclaim_idx].xdpf);
+
+ trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head);
+
+ reclaim_idx = (reclaim_idx + pkt_desc) & q->mask;
+ ndesc += pkt_desc;
+ npkts++;
+ } while (reclaim_idx != head && npkts < budget);
+ }
+
+ q->cons_cnt += ndesc;
+ return npkts;
+}
+
+bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf)
+{
+ unsigned int idx, nfrags = 1, ndesc = 1, tot_len = xdpf->len;
+ const struct skb_shared_info *si = NULL;
+ unsigned int lens[MAX_SKB_FRAGS + 1];
+ dma_addr_t dma[MAX_SKB_FRAGS + 1];
+ struct fun_eth_tx_req *req;
+
+ if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
+ fun_xdpq_clean(q, FUN_XDP_CLEAN_BATCH);
+
+ if (unlikely(xdp_frame_has_frags(xdpf))) {
+ si = xdp_get_shared_info_from_frame(xdpf);
+ tot_len = xdp_get_frame_len(xdpf);
+ nfrags += si->nr_frags;
+ ndesc = DIV_ROUND_UP((sizeof(*req) + nfrags *
+ sizeof(struct fun_dataop_gl)),
+ FUNETH_SQE_SIZE);
+ }
+
+ if (unlikely(fun_txq_avail(q) < ndesc)) {
+ FUN_QSTAT_INC(q, tx_xdp_full);
+ return false;
+ }
+
+ if (unlikely(fun_map_pkt(q->dma_dev, si, xdpf->data, xdpf->len, dma,
+ lens))) {
+ FUN_QSTAT_INC(q, tx_map_err);
+ return false;
+ }
+
+ idx = q->prod_cnt & q->mask;
+ req = fun_tx_desc_addr(q, idx);
+ req->op = FUN_ETH_OP_TX;
+ req->len8 = 0;
+ req->flags = 0;
+ req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
+ req->repr_idn = 0;
+ req->encap_proto = 0;
+ fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
+ req->dataop = FUN_DATAOP_HDR_INIT(nfrags, 0, nfrags, 0, tot_len);
+
+ fun_write_gl(q, req, dma, lens, nfrags);
+
+ q->info[idx].xdpf = xdpf;
+
+ u64_stats_update_begin(&q->syncp);
+ q->stats.tx_bytes += tot_len;
+ q->stats.tx_pkts++;
+ u64_stats_update_end(&q->syncp);
+
+ trace_funeth_tx(q, tot_len, idx, nfrags);
+ q->prod_cnt += ndesc;
+
+ return true;
+}
+
+int fun_xdp_xmit_frames(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct funeth_txq *q, **xdpqs;
+ int i, q_idx;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ xdpqs = rcu_dereference_bh(fp->xdpqs);
+ if (unlikely(!xdpqs))
+ return -ENETDOWN;
+
+ q_idx = smp_processor_id();
+ if (unlikely(q_idx >= fp->num_xdpqs))
+ return -ENXIO;
+
+ for (q = xdpqs[q_idx], i = 0; i < n; i++)
+ if (!fun_xdp_tx(q, frames[i]))
+ break;
+
+ if (unlikely(flags & XDP_XMIT_FLUSH))
+ fun_txq_wr_db(q);
+ return i;
+}
+
+/* Purge a Tx queue of any queued packets. Should be called once HW access
+ * to the packets has been revoked, e.g., after the queue has been disabled.
+ */
+static void fun_txq_purge(struct funeth_txq *q)
+{
+ while (q->cons_cnt != q->prod_cnt) {
+ unsigned int idx = q->cons_cnt & q->mask;
+
+ q->cons_cnt += fun_unmap_pkt(q, idx);
+ dev_kfree_skb_any(q->info[idx].skb);
+ }
+ netdev_tx_reset_queue(q->ndq);
+}
+
+static void fun_xdpq_purge(struct funeth_txq *q)
+{
+ while (q->cons_cnt != q->prod_cnt) {
+ unsigned int idx = q->cons_cnt & q->mask;
+
+ q->cons_cnt += fun_unmap_pkt(q, idx);
+ xdp_return_frame(q->info[idx].xdpf);
+ }
+}
+
+/* Create a Tx queue, allocating all the host resources needed. */
+static struct funeth_txq *fun_txq_create_sw(struct net_device *dev,
+ unsigned int qidx,
+ unsigned int ndesc,
+ struct fun_irq *irq)
+{
+ struct funeth_priv *fp = netdev_priv(dev);
+ struct funeth_txq *q;
+ int numa_node;
+
+ if (irq)
+ numa_node = fun_irq_node(irq); /* skb Tx queue */
+ else
+ numa_node = cpu_to_node(qidx); /* XDP Tx queue */
+
+ q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
+ if (!q)
+ goto err;
+
+ q->dma_dev = &fp->pdev->dev;
+ q->desc = fun_alloc_ring_mem(q->dma_dev, ndesc, FUNETH_SQE_SIZE,
+ sizeof(*q->info), true, numa_node,
+ &q->dma_addr, (void **)&q->info,
+ &q->hw_wb);
+ if (!q->desc)
+ goto free_q;
+
+ q->netdev = dev;
+ q->mask = ndesc - 1;
+ q->qidx = qidx;
+ q->numa_node = numa_node;
+ u64_stats_init(&q->syncp);
+ q->init_state = FUN_QSTATE_INIT_SW;
+ return q;
+
+free_q:
+ kfree(q);
+err:
+ netdev_err(dev, "Can't allocate memory for %s queue %u\n",
+ irq ? "Tx" : "XDP", qidx);
+ return NULL;
+}
+
+static void fun_txq_free_sw(struct funeth_txq *q)
+{
+ struct funeth_priv *fp = netdev_priv(q->netdev);
+
+ fun_free_ring_mem(q->dma_dev, q->mask + 1, FUNETH_SQE_SIZE, true,
+ q->desc, q->dma_addr, q->info);
+
+ fp->tx_packets += q->stats.tx_pkts;
+ fp->tx_bytes += q->stats.tx_bytes;
+ fp->tx_dropped += q->stats.tx_map_err;
+
+ kfree(q);
+}
+
+/* Allocate the device portion of a Tx queue. */
+int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq)
+{
+ struct funeth_priv *fp = netdev_priv(q->netdev);
+ unsigned int irq_idx, ndesc = q->mask + 1;
+ int err;
+
+ q->irq = irq;
+ *q->hw_wb = 0;
+ q->prod_cnt = 0;
+ q->cons_cnt = 0;
+ irq_idx = irq ? irq->irq_idx : 0;
+
+ err = fun_sq_create(fp->fdev,
+ FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS |
+ FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, 0,
+ FUN_HCI_ID_INVALID, ilog2(FUNETH_SQE_SIZE), ndesc,
+ q->dma_addr, fp->tx_coal_count, fp->tx_coal_usec,
+ irq_idx, 0, fp->fdev->kern_end_qid, 0,
+ &q->hw_qid, &q->db);
+ if (err)
+ goto out;
+
+ err = fun_create_and_bind_tx(fp, q->hw_qid);
+ if (err < 0)
+ goto free_devq;
+ q->ethid = err;
+
+ if (irq) {
+ irq->txq = q;
+ q->ndq = netdev_get_tx_queue(q->netdev, q->qidx);
+ q->irq_db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec,
+ fp->tx_coal_count);
+ writel(q->irq_db_val, q->db);
+ }
+
+ q->init_state = FUN_QSTATE_INIT_FULL;
+ netif_info(fp, ifup, q->netdev,
+ "%s queue %u, depth %u, HW qid %u, IRQ idx %u, eth id %u, node %d\n",
+ irq ? "Tx" : "XDP", q->qidx, ndesc, q->hw_qid, irq_idx,
+ q->ethid, q->numa_node);
+ return 0;
+
+free_devq:
+ fun_destroy_sq(fp->fdev, q->hw_qid);
+out:
+ netdev_err(q->netdev,
+ "Failed to create %s queue %u on device, error %d\n",
+ irq ? "Tx" : "XDP", q->qidx, err);
+ return err;
+}
+
+static void fun_txq_free_dev(struct funeth_txq *q)
+{
+ struct funeth_priv *fp = netdev_priv(q->netdev);
+
+ if (q->init_state < FUN_QSTATE_INIT_FULL)
+ return;
+
+ netif_info(fp, ifdown, q->netdev,
+ "Freeing %s queue %u (id %u), IRQ %u, ethid %u\n",
+ q->irq ? "Tx" : "XDP", q->qidx, q->hw_qid,
+ q->irq ? q->irq->irq_idx : 0, q->ethid);
+
+ fun_destroy_sq(fp->fdev, q->hw_qid);
+ fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, q->ethid);
+
+ if (q->irq) {
+ q->irq->txq = NULL;
+ fun_txq_purge(q);
+ } else {
+ fun_xdpq_purge(q);
+ }
+
+ q->init_state = FUN_QSTATE_INIT_SW;
+}
+
+/* Create or advance a Tx queue, allocating all the host and device resources
+ * needed to reach the target state.
+ */
+int funeth_txq_create(struct net_device *dev, unsigned int qidx,
+ unsigned int ndesc, struct fun_irq *irq, int state,
+ struct funeth_txq **qp)
+{
+ struct funeth_txq *q = *qp;
+ int err;
+
+ if (!q)
+ q = fun_txq_create_sw(dev, qidx, ndesc, irq);
+ if (!q)
+ return -ENOMEM;
+
+ if (q->init_state >= state)
+ goto out;
+
+ err = fun_txq_create_dev(q, irq);
+ if (err) {
+ if (!*qp)
+ fun_txq_free_sw(q);
+ return err;
+ }
+
+out:
+ *qp = q;
+ return 0;
+}
+
+/* Free Tx queue resources until it reaches the target state.
+ * The queue must be already disconnected from the stack.
+ */
+struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state)
+{
+ if (state < FUN_QSTATE_INIT_FULL)
+ fun_txq_free_dev(q);
+
+ if (state == FUN_QSTATE_DESTROYED) {
+ fun_txq_free_sw(q);
+ q = NULL;
+ }
+
+ return q;
+}
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_txrx.h b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
new file mode 100644
index 000000000..671f51135
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_txrx.h
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef _FUNETH_TXRX_H
+#define _FUNETH_TXRX_H
+
+#include <linux/netdevice.h>
+#include <linux/u64_stats_sync.h>
+
+/* Tx descriptor size */
+#define FUNETH_SQE_SIZE 64U
+
+/* Size of device headers per Tx packet */
+#define FUNETH_FUNOS_HDR_SZ (sizeof(struct fun_eth_tx_req))
+
+/* Number of gather list entries per Tx descriptor */
+#define FUNETH_GLE_PER_DESC (FUNETH_SQE_SIZE / sizeof(struct fun_dataop_gl))
+
+/* Max gather list size in bytes for an sk_buff. */
+#define FUNETH_MAX_GL_SZ ((MAX_SKB_FRAGS + 1) * sizeof(struct fun_dataop_gl))
+
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+# define FUNETH_TLS_SZ sizeof(struct fun_eth_tls)
+#else
+# define FUNETH_TLS_SZ 0
+#endif
+
+/* Max number of Tx descriptors for an sk_buff using a gather list. */
+#define FUNETH_MAX_GL_DESC \
+ DIV_ROUND_UP((FUNETH_FUNOS_HDR_SZ + FUNETH_MAX_GL_SZ + FUNETH_TLS_SZ), \
+ FUNETH_SQE_SIZE)
+
+/* Max number of Tx descriptors for any packet. */
+#define FUNETH_MAX_PKT_DESC FUNETH_MAX_GL_DESC
+
+/* Rx CQ descriptor size. */
+#define FUNETH_CQE_SIZE 64U
+
+/* Offset of cqe_info within a CQE. */
+#define FUNETH_CQE_INFO_OFFSET (FUNETH_CQE_SIZE - sizeof(struct fun_cqe_info))
+
+/* Construct the IRQ portion of a CQ doorbell. The resulting value arms the
+ * interrupt with the supplied time delay and packet count moderation settings.
+ */
+#define FUN_IRQ_CQ_DB(usec, pkts) \
+ (FUN_DB_IRQ_ARM_F | ((usec) << FUN_DB_INTCOAL_USEC_S) | \
+ ((pkts) << FUN_DB_INTCOAL_ENTRIES_S))
+
+/* As above for SQ doorbells. */
+#define FUN_IRQ_SQ_DB(usec, pkts) \
+ (FUN_DB_IRQ_ARM_F | \
+ ((usec) << FUN_DB_INTCOAL_USEC_S) | \
+ ((pkts) << FUN_DB_INTCOAL_ENTRIES_S))
+
+/* Per packet tailroom. Present only for 1-frag packets. */
+#define FUN_RX_TAILROOM SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
+
+/* Per packet headroom for XDP. Preferred over XDP_PACKET_HEADROOM to
+ * accommodate two packets per buffer for 4K pages and 1500B MTUs.
+ */
+#define FUN_XDP_HEADROOM 192
+
+/* Initialization state of a queue. */
+enum {
+ FUN_QSTATE_DESTROYED, /* what queue? */
+ FUN_QSTATE_INIT_SW, /* exists in SW, not on the device */
+ FUN_QSTATE_INIT_FULL, /* exists both in SW and on device */
+};
+
+/* Initialization state of an interrupt. */
+enum {
+ FUN_IRQ_INIT, /* initialized and in the XArray but inactive */
+ FUN_IRQ_REQUESTED, /* request_irq() done */
+ FUN_IRQ_ENABLED, /* processing enabled */
+ FUN_IRQ_DISABLED, /* processing disabled */
+};
+
+struct bpf_prog;
+
+struct funeth_txq_stats { /* per Tx queue SW counters */
+ u64 tx_pkts; /* # of Tx packets */
+ u64 tx_bytes; /* total bytes of Tx packets */
+ u64 tx_cso; /* # of packets with checksum offload */
+ u64 tx_tso; /* # of non-encapsulated TSO super-packets */
+ u64 tx_encap_tso; /* # of encapsulated TSO super-packets */
+ u64 tx_uso; /* # of non-encapsulated UDP LSO super-packets */
+ u64 tx_more; /* # of DBs elided due to xmit_more */
+ u64 tx_nstops; /* # of times the queue has stopped */
+ u64 tx_nrestarts; /* # of times the queue has restarted */
+ u64 tx_map_err; /* # of packets dropped due to DMA mapping errors */
+ u64 tx_xdp_full; /* # of XDP packets that could not be enqueued */
+ u64 tx_tls_pkts; /* # of Tx TLS packets offloaded to HW */
+ u64 tx_tls_bytes; /* Tx bytes of HW-handled TLS payload */
+ u64 tx_tls_fallback; /* attempted Tx TLS offloads punted to SW */
+ u64 tx_tls_drops; /* attempted Tx TLS offloads dropped */
+};
+
+struct funeth_tx_info { /* per Tx descriptor state */
+ union {
+ struct sk_buff *skb; /* associated packet (sk_buff path) */
+ struct xdp_frame *xdpf; /* associated XDP frame (XDP path) */
+ };
+};
+
+struct funeth_txq {
+ /* RO cacheline of frequently accessed data */
+ u32 mask; /* queue depth - 1 */
+ u32 hw_qid; /* device ID of the queue */
+ void *desc; /* base address of descriptor ring */
+ struct funeth_tx_info *info;
+ struct device *dma_dev; /* device for DMA mappings */
+ volatile __be64 *hw_wb; /* HW write-back location */
+ u32 __iomem *db; /* SQ doorbell register address */
+ struct netdev_queue *ndq;
+ dma_addr_t dma_addr; /* DMA address of descriptor ring */
+ /* producer R/W cacheline */
+ u16 qidx; /* queue index within net_device */
+ u16 ethid;
+ u32 prod_cnt; /* producer counter */
+ struct funeth_txq_stats stats;
+ /* shared R/W cacheline, primarily accessed by consumer */
+ u32 irq_db_val; /* value written to IRQ doorbell */
+ u32 cons_cnt; /* consumer (cleanup) counter */
+ struct net_device *netdev;
+ struct fun_irq *irq;
+ int numa_node;
+ u8 init_state; /* queue initialization state */
+ struct u64_stats_sync syncp;
+};
+
+struct funeth_rxq_stats { /* per Rx queue SW counters */
+ u64 rx_pkts; /* # of received packets, including SW drops */
+ u64 rx_bytes; /* total size of received packets */
+ u64 rx_cso; /* # of packets with checksum offload */
+ u64 rx_bufs; /* total # of Rx buffers provided to device */
+ u64 gro_pkts; /* # of GRO superpackets */
+ u64 gro_merged; /* # of pkts merged into existing GRO superpackets */
+ u64 rx_page_alloc; /* # of page allocations for Rx buffers */
+ u64 rx_budget; /* NAPI iterations that exhausted their budget */
+ u64 rx_mem_drops; /* # of packets dropped due to memory shortage */
+ u64 rx_map_err; /* # of page DMA mapping errors */
+ u64 xdp_drops; /* XDP_DROPped packets */
+ u64 xdp_tx; /* successful XDP transmits */
+ u64 xdp_redir; /* successful XDP redirects */
+ u64 xdp_err; /* packets dropped due to XDP errors */
+};
+
+struct funeth_rxbuf { /* per Rx buffer state */
+ struct page *page; /* associated page */
+ dma_addr_t dma_addr; /* DMA address of page start */
+ int pg_refs; /* page refs held by driver */
+ int node; /* page node, or -1 if it is PF_MEMALLOC */
+};
+
+struct funeth_rx_cache { /* cache of DMA-mapped previously used buffers */
+ struct funeth_rxbuf *bufs; /* base of Rx buffer state ring */
+ unsigned int prod_cnt; /* producer counter */
+ unsigned int cons_cnt; /* consumer counter */
+ unsigned int mask; /* depth - 1 */
+};
+
+/* An Rx queue consists of a CQ and an SQ used to provide Rx buffers. */
+struct funeth_rxq {
+ struct net_device *netdev;
+ struct napi_struct *napi;
+ struct device *dma_dev; /* device for DMA mappings */
+ void *cqes; /* base of CQ descriptor ring */
+ const void *next_cqe_info; /* fun_cqe_info of next CQE */
+ u32 __iomem *cq_db; /* CQ doorbell register address */
+ unsigned int cq_head; /* CQ head index */
+ unsigned int cq_mask; /* CQ depth - 1 */
+ u16 phase; /* CQ phase tag */
+ u16 qidx; /* queue index within net_device */
+ unsigned int irq_db_val; /* IRQ info for CQ doorbell */
+ struct fun_eprq_rqbuf *rqes; /* base of RQ descriptor ring */
+ struct funeth_rxbuf *bufs; /* base of Rx buffer state ring */
+ struct funeth_rxbuf *cur_buf; /* currently active buffer */
+ u32 __iomem *rq_db; /* RQ doorbell register address */
+ unsigned int rq_cons; /* RQ consumer counter */
+ unsigned int rq_mask; /* RQ depth - 1 */
+ unsigned int buf_offset; /* offset of next pkt in head buffer */
+ u8 xdp_flush; /* XDP flush types needed at NAPI end */
+ u8 init_state; /* queue initialization state */
+ u16 headroom; /* per packet headroom */
+ unsigned int rq_cons_db; /* value of rq_cons at last RQ db */
+ unsigned int rq_db_thres; /* # of new buffers needed to write RQ db */
+ struct funeth_rxbuf spare_buf; /* spare for next buffer replacement */
+ struct funeth_rx_cache cache; /* used buffer cache */
+ struct bpf_prog *xdp_prog; /* optional XDP BPF program */
+ struct funeth_rxq_stats stats;
+ dma_addr_t cq_dma_addr; /* DMA address of CQE ring */
+ dma_addr_t rq_dma_addr; /* DMA address of RQE ring */
+ u16 irq_cnt;
+ u32 hw_cqid; /* device ID of the queue's CQ */
+ u32 hw_sqid; /* device ID of the queue's SQ */
+ int numa_node;
+ struct u64_stats_sync syncp;
+ struct xdp_rxq_info xdp_rxq;
+};
+
+#define FUN_QSTAT_INC(q, counter) \
+ do { \
+ u64_stats_update_begin(&(q)->syncp); \
+ (q)->stats.counter++; \
+ u64_stats_update_end(&(q)->syncp); \
+ } while (0)
+
+#define FUN_QSTAT_READ(q, seq, stats_copy) \
+ do { \
+ seq = u64_stats_fetch_begin_irq(&(q)->syncp); \
+ stats_copy = (q)->stats; \
+ } while (u64_stats_fetch_retry_irq(&(q)->syncp, (seq)))
+
+#define FUN_INT_NAME_LEN (IFNAMSIZ + 16)
+
+struct fun_irq {
+ struct napi_struct napi;
+ struct funeth_txq *txq;
+ struct funeth_rxq *rxq;
+ u8 state;
+ u16 irq_idx; /* index of MSI-X interrupt */
+ int irq; /* Linux IRQ vector */
+ cpumask_t affinity_mask; /* IRQ affinity */
+ struct irq_affinity_notify aff_notify;
+ char name[FUN_INT_NAME_LEN];
+} ____cacheline_internodealigned_in_smp;
+
+/* Return the start address of the idx-th Tx descriptor. */
+static inline void *fun_tx_desc_addr(const struct funeth_txq *q,
+ unsigned int idx)
+{
+ return q->desc + idx * FUNETH_SQE_SIZE;
+}
+
+static inline void fun_txq_wr_db(const struct funeth_txq *q)
+{
+ unsigned int tail = q->prod_cnt & q->mask;
+
+ writel(tail, q->db);
+}
+
+static inline int fun_irq_node(const struct fun_irq *p)
+{
+ return cpu_to_mem(cpumask_first(&p->affinity_mask));
+}
+
+int fun_rxq_napi_poll(struct napi_struct *napi, int budget);
+int fun_txq_napi_poll(struct napi_struct *napi, int budget);
+netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf);
+int fun_xdp_xmit_frames(struct net_device *dev, int n,
+ struct xdp_frame **frames, u32 flags);
+
+int funeth_txq_create(struct net_device *dev, unsigned int qidx,
+ unsigned int ndesc, struct fun_irq *irq, int state,
+ struct funeth_txq **qp);
+int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq);
+struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state);
+int funeth_rxq_create(struct net_device *dev, unsigned int qidx,
+ unsigned int ncqe, unsigned int nrqe, struct fun_irq *irq,
+ int state, struct funeth_rxq **qp);
+int fun_rxq_create_dev(struct funeth_rxq *q, struct fun_irq *irq);
+struct funeth_rxq *funeth_rxq_free(struct funeth_rxq *q, int state);
+int fun_rxq_set_bpf(struct funeth_rxq *q, struct bpf_prog *prog);
+
+#endif /* _FUNETH_TXRX_H */