From ace9429bb58fd418f0c81d4c2835699bddf6bde6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:27:49 +0200 Subject: Adding upstream version 6.6.15. Signed-off-by: Daniel Baumann --- drivers/net/ethernet/microsoft/Kconfig | 31 + drivers/net/ethernet/microsoft/Makefile | 5 + drivers/net/ethernet/microsoft/mana/Makefile | 6 + drivers/net/ethernet/microsoft/mana/gdma_main.c | 1561 +++++++++++ drivers/net/ethernet/microsoft/mana/hw_channel.c | 870 ++++++ drivers/net/ethernet/microsoft/mana/mana_bpf.c | 226 ++ drivers/net/ethernet/microsoft/mana/mana_en.c | 2904 ++++++++++++++++++++ drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 331 +++ drivers/net/ethernet/microsoft/mana/shm_channel.c | 291 ++ 9 files changed, 6225 insertions(+) create mode 100644 drivers/net/ethernet/microsoft/Kconfig create mode 100644 drivers/net/ethernet/microsoft/Makefile create mode 100644 drivers/net/ethernet/microsoft/mana/Makefile create mode 100644 drivers/net/ethernet/microsoft/mana/gdma_main.c create mode 100644 drivers/net/ethernet/microsoft/mana/hw_channel.c create mode 100644 drivers/net/ethernet/microsoft/mana/mana_bpf.c create mode 100644 drivers/net/ethernet/microsoft/mana/mana_en.c create mode 100644 drivers/net/ethernet/microsoft/mana/mana_ethtool.c create mode 100644 drivers/net/ethernet/microsoft/mana/shm_channel.c (limited to 'drivers/net/ethernet/microsoft') diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig new file mode 100644 index 0000000000..01eb7445ea --- /dev/null +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -0,0 +1,31 @@ +# +# Microsoft Azure network device configuration +# + +config NET_VENDOR_MICROSOFT + bool "Microsoft Network Devices" + default y + help + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip the + question about Microsoft network devices. If you say Y, you will be + asked for your specific device in the following question. + +if NET_VENDOR_MICROSOFT + +config MICROSOFT_MANA + tristate "Microsoft Azure Network Adapter (MANA) support" + depends on PCI_MSI && X86_64 + depends on PCI_HYPERV + select AUXILIARY_BUS + select PAGE_POOL + help + This driver supports Microsoft Azure Network Adapter (MANA). + So far, the driver is only supported on X86_64. + + To compile this driver as a module, choose M here. + The module will be called mana. + +endif #NET_VENDOR_MICROSOFT diff --git a/drivers/net/ethernet/microsoft/Makefile b/drivers/net/ethernet/microsoft/Makefile new file mode 100644 index 0000000000..d2ddc21813 --- /dev/null +++ b/drivers/net/ethernet/microsoft/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Microsoft Azure network device driver. +# + +obj-$(CONFIG_MICROSOFT_MANA) += mana/ diff --git a/drivers/net/ethernet/microsoft/mana/Makefile b/drivers/net/ethernet/microsoft/mana/Makefile new file mode 100644 index 0000000000..e16a4221f5 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +# +# Makefile for the Microsoft Azure Network Adapter driver + +obj-$(CONFIG_MICROSOFT_MANA) += mana.o +mana-objs := gdma_main.o shm_channel.o hw_channel.o mana_en.o mana_ethtool.o mana_bpf.o diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c new file mode 100644 index 0000000000..6367de0c2c --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -0,0 +1,1561 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include +#include +#include +#include + +#include + +static u32 mana_gd_r32(struct gdma_context *g, u64 offset) +{ + return readl(g->bar0_va + offset); +} + +static u64 mana_gd_r64(struct gdma_context *g, u64 offset) +{ + return readq(g->bar0_va + offset); +} + +static void mana_gd_init_pf_regs(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + void __iomem *sriov_base_va; + u64 sriov_base_off; + + gc->db_page_size = mana_gd_r32(gc, GDMA_PF_REG_DB_PAGE_SIZE) & 0xFFFF; + gc->db_page_base = gc->bar0_va + + mana_gd_r64(gc, GDMA_PF_REG_DB_PAGE_OFF); + + sriov_base_off = mana_gd_r64(gc, GDMA_SRIOV_REG_CFG_BASE_OFF); + + sriov_base_va = gc->bar0_va + sriov_base_off; + gc->shm_base = sriov_base_va + + mana_gd_r64(gc, sriov_base_off + GDMA_PF_REG_SHM_OFF); +} + +static void mana_gd_init_vf_regs(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + gc->db_page_size = mana_gd_r32(gc, GDMA_REG_DB_PAGE_SIZE) & 0xFFFF; + + gc->db_page_base = gc->bar0_va + + mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); + + gc->phys_db_page_base = gc->bar0_pa + + mana_gd_r64(gc, GDMA_REG_DB_PAGE_OFFSET); + + gc->shm_base = gc->bar0_va + mana_gd_r64(gc, GDMA_REG_SHM_OFFSET); +} + +static void mana_gd_init_registers(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + if (gc->is_pf) + mana_gd_init_pf_regs(pdev); + else + mana_gd_init_vf_regs(pdev); +} + +static int mana_gd_query_max_resources(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_query_max_resources_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES, + sizeof(req), sizeof(resp)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to query resource info: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + if (gc->num_msix_usable > resp.max_msix) + gc->num_msix_usable = resp.max_msix; + + if (gc->num_msix_usable <= 1) + return -ENOSPC; + + gc->max_num_queues = num_online_cpus(); + if (gc->max_num_queues > MANA_MAX_NUM_QUEUES) + gc->max_num_queues = MANA_MAX_NUM_QUEUES; + + if (gc->max_num_queues > resp.max_eq) + gc->max_num_queues = resp.max_eq; + + if (gc->max_num_queues > resp.max_cq) + gc->max_num_queues = resp.max_cq; + + if (gc->max_num_queues > resp.max_sq) + gc->max_num_queues = resp.max_sq; + + if (gc->max_num_queues > resp.max_rq) + gc->max_num_queues = resp.max_rq; + + /* The Hardware Channel (HWC) used 1 MSI-X */ + if (gc->max_num_queues > gc->num_msix_usable - 1) + gc->max_num_queues = gc->num_msix_usable - 1; + + return 0; +} + +static int mana_gd_query_hwc_timeout(struct pci_dev *pdev, u32 *timeout_val) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_query_hwc_timeout_resp resp = {}; + struct gdma_query_hwc_timeout_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_HWC_TIMEOUT, + sizeof(req), sizeof(resp)); + req.timeout_ms = *timeout_val; + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) + return err ? err : -EPROTO; + + *timeout_val = resp.timeout_ms; + + return 0; +} + +static int mana_gd_detect_devices(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_list_devices_resp resp = {}; + struct gdma_general_req req = {}; + struct gdma_dev_id dev; + u32 i, max_num_devs; + u16 dev_type; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_LIST_DEVICES, sizeof(req), + sizeof(resp)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to detect devices: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + max_num_devs = min_t(u32, MAX_NUM_GDMA_DEVICES, resp.num_of_devs); + + for (i = 0; i < max_num_devs; i++) { + dev = resp.devs[i]; + dev_type = dev.type; + + /* HWC is already detected in mana_hwc_create_channel(). */ + if (dev_type == GDMA_DEVICE_HWC) + continue; + + if (dev_type == GDMA_DEVICE_MANA) { + gc->mana.gdma_context = gc; + gc->mana.dev_id = dev; + } + } + + return gc->mana.dev_id.type == 0 ? -ENODEV : 0; +} + +int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + + return mana_hwc_send_request(hwc, req_len, req, resp_len, resp); +} +EXPORT_SYMBOL_NS(mana_gd_send_request, NET_MANA); + +int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi) +{ + dma_addr_t dma_handle; + void *buf; + + if (length < PAGE_SIZE || !is_power_of_2(length)) + return -EINVAL; + + gmi->dev = gc->dev; + buf = dma_alloc_coherent(gmi->dev, length, &dma_handle, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + gmi->dma_handle = dma_handle; + gmi->virt_addr = buf; + gmi->length = length; + + return 0; +} + +void mana_gd_free_memory(struct gdma_mem_info *gmi) +{ + dma_free_coherent(gmi->dev, gmi->length, gmi->virt_addr, + gmi->dma_handle); +} + +static int mana_gd_create_hw_eq(struct gdma_context *gc, + struct gdma_queue *queue) +{ + struct gdma_create_queue_resp resp = {}; + struct gdma_create_queue_req req = {}; + int err; + + if (queue->type != GDMA_EQ) + return -EINVAL; + + mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_QUEUE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = queue->gdma_dev->dev_id; + req.type = queue->type; + req.pdid = queue->gdma_dev->pdid; + req.doolbell_id = queue->gdma_dev->doorbell; + req.gdma_region = queue->mem_info.dma_region_handle; + req.queue_size = queue->queue_size; + req.log2_throttle_limit = queue->eq.log2_throttle_limit; + req.eq_pci_msix_index = queue->eq.msix_index; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to create queue: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + queue->id = resp.queue_index; + queue->eq.disable_needed = true; + queue->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; + return 0; +} + +static int mana_gd_disable_queue(struct gdma_queue *queue) +{ + struct gdma_context *gc = queue->gdma_dev->gdma_context; + struct gdma_disable_queue_req req = {}; + struct gdma_general_resp resp = {}; + int err; + + WARN_ON(queue->type != GDMA_EQ); + + mana_gd_init_req_hdr(&req.hdr, GDMA_DISABLE_QUEUE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = queue->gdma_dev->dev_id; + req.type = queue->type; + req.queue_index = queue->id; + req.alloc_res_id_on_creation = 1; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to disable queue: %d, 0x%x\n", err, + resp.hdr.status); + return err ? err : -EPROTO; + } + + return 0; +} + +#define DOORBELL_OFFSET_SQ 0x0 +#define DOORBELL_OFFSET_RQ 0x400 +#define DOORBELL_OFFSET_CQ 0x800 +#define DOORBELL_OFFSET_EQ 0xFF8 + +static void mana_gd_ring_doorbell(struct gdma_context *gc, u32 db_index, + enum gdma_queue_type q_type, u32 qid, + u32 tail_ptr, u8 num_req) +{ + void __iomem *addr = gc->db_page_base + gc->db_page_size * db_index; + union gdma_doorbell_entry e = {}; + + switch (q_type) { + case GDMA_EQ: + e.eq.id = qid; + e.eq.tail_ptr = tail_ptr; + e.eq.arm = num_req; + + addr += DOORBELL_OFFSET_EQ; + break; + + case GDMA_CQ: + e.cq.id = qid; + e.cq.tail_ptr = tail_ptr; + e.cq.arm = num_req; + + addr += DOORBELL_OFFSET_CQ; + break; + + case GDMA_RQ: + e.rq.id = qid; + e.rq.tail_ptr = tail_ptr; + e.rq.wqe_cnt = num_req; + + addr += DOORBELL_OFFSET_RQ; + break; + + case GDMA_SQ: + e.sq.id = qid; + e.sq.tail_ptr = tail_ptr; + + addr += DOORBELL_OFFSET_SQ; + break; + + default: + WARN_ON(1); + return; + } + + /* Ensure all writes are done before ring doorbell */ + wmb(); + + writeq(e.as_uint64, addr); +} + +void mana_gd_wq_ring_doorbell(struct gdma_context *gc, struct gdma_queue *queue) +{ + /* Hardware Spec specifies that software client should set 0 for + * wqe_cnt for Receive Queues. This value is not used in Send Queues. + */ + mana_gd_ring_doorbell(gc, queue->gdma_dev->doorbell, queue->type, + queue->id, queue->head * GDMA_WQE_BU_SIZE, 0); +} + +void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit) +{ + struct gdma_context *gc = cq->gdma_dev->gdma_context; + + u32 num_cqe = cq->queue_size / GDMA_CQE_SIZE; + + u32 head = cq->head % (num_cqe << GDMA_CQE_OWNER_BITS); + + mana_gd_ring_doorbell(gc, cq->gdma_dev->doorbell, cq->type, cq->id, + head, arm_bit); +} + +static void mana_gd_process_eqe(struct gdma_queue *eq) +{ + u32 head = eq->head % (eq->queue_size / GDMA_EQE_SIZE); + struct gdma_context *gc = eq->gdma_dev->gdma_context; + struct gdma_eqe *eq_eqe_ptr = eq->queue_mem_ptr; + union gdma_eqe_info eqe_info; + enum gdma_eqe_type type; + struct gdma_event event; + struct gdma_queue *cq; + struct gdma_eqe *eqe; + u32 cq_id; + + eqe = &eq_eqe_ptr[head]; + eqe_info.as_uint32 = eqe->eqe_info; + type = eqe_info.type; + + switch (type) { + case GDMA_EQE_COMPLETION: + cq_id = eqe->details[0] & 0xFFFFFF; + if (WARN_ON_ONCE(cq_id >= gc->max_num_cqs)) + break; + + cq = gc->cq_table[cq_id]; + if (WARN_ON_ONCE(!cq || cq->type != GDMA_CQ || cq->id != cq_id)) + break; + + if (cq->cq.callback) + cq->cq.callback(cq->cq.context, cq); + + break; + + case GDMA_EQE_TEST_EVENT: + gc->test_event_eq_id = eq->id; + complete(&gc->eq_test_event); + break; + + case GDMA_EQE_HWC_INIT_EQ_ID_DB: + case GDMA_EQE_HWC_INIT_DATA: + case GDMA_EQE_HWC_INIT_DONE: + if (!eq->eq.callback) + break; + + event.type = type; + memcpy(&event.details, &eqe->details, GDMA_EVENT_DATA_SIZE); + eq->eq.callback(eq->eq.context, eq, &event); + break; + + default: + break; + } +} + +static void mana_gd_process_eq_events(void *arg) +{ + u32 owner_bits, new_bits, old_bits; + union gdma_eqe_info eqe_info; + struct gdma_eqe *eq_eqe_ptr; + struct gdma_queue *eq = arg; + struct gdma_context *gc; + struct gdma_eqe *eqe; + u32 head, num_eqe; + int i; + + gc = eq->gdma_dev->gdma_context; + + num_eqe = eq->queue_size / GDMA_EQE_SIZE; + eq_eqe_ptr = eq->queue_mem_ptr; + + /* Process up to 5 EQEs at a time, and update the HW head. */ + for (i = 0; i < 5; i++) { + eqe = &eq_eqe_ptr[eq->head % num_eqe]; + eqe_info.as_uint32 = eqe->eqe_info; + owner_bits = eqe_info.owner_bits; + + old_bits = (eq->head / num_eqe - 1) & GDMA_EQE_OWNER_MASK; + /* No more entries */ + if (owner_bits == old_bits) + break; + + new_bits = (eq->head / num_eqe) & GDMA_EQE_OWNER_MASK; + if (owner_bits != new_bits) { + dev_err(gc->dev, "EQ %d: overflow detected\n", eq->id); + break; + } + + /* Per GDMA spec, rmb is necessary after checking owner_bits, before + * reading eqe. + */ + rmb(); + + mana_gd_process_eqe(eq); + + eq->head++; + } + + head = eq->head % (num_eqe << GDMA_EQE_OWNER_BITS); + + mana_gd_ring_doorbell(gc, eq->gdma_dev->doorbell, eq->type, eq->id, + head, SET_ARM_BIT); +} + +static int mana_gd_register_irq(struct gdma_queue *queue, + const struct gdma_queue_spec *spec) +{ + struct gdma_dev *gd = queue->gdma_dev; + struct gdma_irq_context *gic; + struct gdma_context *gc; + struct gdma_resource *r; + unsigned int msi_index; + unsigned long flags; + struct device *dev; + int err = 0; + + gc = gd->gdma_context; + r = &gc->msix_resource; + dev = gc->dev; + + spin_lock_irqsave(&r->lock, flags); + + msi_index = find_first_zero_bit(r->map, r->size); + if (msi_index >= r->size || msi_index >= gc->num_msix_usable) { + err = -ENOSPC; + } else { + bitmap_set(r->map, msi_index, 1); + queue->eq.msix_index = msi_index; + } + + spin_unlock_irqrestore(&r->lock, flags); + + if (err) { + dev_err(dev, "Register IRQ err:%d, msi:%u rsize:%u, nMSI:%u", + err, msi_index, r->size, gc->num_msix_usable); + + return err; + } + + gic = &gc->irq_contexts[msi_index]; + + WARN_ON(gic->handler || gic->arg); + + gic->arg = queue; + + gic->handler = mana_gd_process_eq_events; + + return 0; +} + +static void mana_gd_deregiser_irq(struct gdma_queue *queue) +{ + struct gdma_dev *gd = queue->gdma_dev; + struct gdma_irq_context *gic; + struct gdma_context *gc; + struct gdma_resource *r; + unsigned int msix_index; + unsigned long flags; + + gc = gd->gdma_context; + r = &gc->msix_resource; + + /* At most num_online_cpus() + 1 interrupts are used. */ + msix_index = queue->eq.msix_index; + if (WARN_ON(msix_index >= gc->num_msix_usable)) + return; + + gic = &gc->irq_contexts[msix_index]; + gic->handler = NULL; + gic->arg = NULL; + + spin_lock_irqsave(&r->lock, flags); + bitmap_clear(r->map, msix_index, 1); + spin_unlock_irqrestore(&r->lock, flags); + + queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; +} + +int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq) +{ + struct gdma_generate_test_event_req req = {}; + struct gdma_general_resp resp = {}; + struct device *dev = gc->dev; + int err; + + mutex_lock(&gc->eq_test_event_mutex); + + init_completion(&gc->eq_test_event); + gc->test_event_eq_id = INVALID_QUEUE_ID; + + mana_gd_init_req_hdr(&req.hdr, GDMA_GENERATE_TEST_EQE, + sizeof(req), sizeof(resp)); + + req.hdr.dev_id = eq->gdma_dev->dev_id; + req.queue_index = eq->id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err) { + dev_err(dev, "test_eq failed: %d\n", err); + goto out; + } + + err = -EPROTO; + + if (resp.hdr.status) { + dev_err(dev, "test_eq failed: 0x%x\n", resp.hdr.status); + goto out; + } + + if (!wait_for_completion_timeout(&gc->eq_test_event, 30 * HZ)) { + dev_err(dev, "test_eq timed out on queue %d\n", eq->id); + goto out; + } + + if (eq->id != gc->test_event_eq_id) { + dev_err(dev, "test_eq got an event on wrong queue %d (%d)\n", + gc->test_event_eq_id, eq->id); + goto out; + } + + err = 0; +out: + mutex_unlock(&gc->eq_test_event_mutex); + return err; +} + +static void mana_gd_destroy_eq(struct gdma_context *gc, bool flush_evenets, + struct gdma_queue *queue) +{ + int err; + + if (flush_evenets) { + err = mana_gd_test_eq(gc, queue); + if (err) + dev_warn(gc->dev, "Failed to flush EQ: %d\n", err); + } + + mana_gd_deregiser_irq(queue); + + if (queue->eq.disable_needed) + mana_gd_disable_queue(queue); +} + +static int mana_gd_create_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + bool create_hwq, struct gdma_queue *queue) +{ + struct gdma_context *gc = gd->gdma_context; + struct device *dev = gc->dev; + u32 log2_num_entries; + int err; + + queue->eq.msix_index = INVALID_PCI_MSIX_INDEX; + + log2_num_entries = ilog2(queue->queue_size / GDMA_EQE_SIZE); + + if (spec->eq.log2_throttle_limit > log2_num_entries) { + dev_err(dev, "EQ throttling limit (%lu) > maximum EQE (%u)\n", + spec->eq.log2_throttle_limit, log2_num_entries); + return -EINVAL; + } + + err = mana_gd_register_irq(queue, spec); + if (err) { + dev_err(dev, "Failed to register irq: %d\n", err); + return err; + } + + queue->eq.callback = spec->eq.callback; + queue->eq.context = spec->eq.context; + queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); + queue->eq.log2_throttle_limit = spec->eq.log2_throttle_limit ?: 1; + + if (create_hwq) { + err = mana_gd_create_hw_eq(gc, queue); + if (err) + goto out; + + err = mana_gd_test_eq(gc, queue); + if (err) + goto out; + } + + return 0; +out: + dev_err(dev, "Failed to create EQ: %d\n", err); + mana_gd_destroy_eq(gc, false, queue); + return err; +} + +static void mana_gd_create_cq(const struct gdma_queue_spec *spec, + struct gdma_queue *queue) +{ + u32 log2_num_entries = ilog2(spec->queue_size / GDMA_CQE_SIZE); + + queue->head |= INITIALIZED_OWNER_BIT(log2_num_entries); + queue->cq.parent = spec->cq.parent_eq; + queue->cq.context = spec->cq.context; + queue->cq.callback = spec->cq.callback; +} + +static void mana_gd_destroy_cq(struct gdma_context *gc, + struct gdma_queue *queue) +{ + u32 id = queue->id; + + if (id >= gc->max_num_cqs) + return; + + if (!gc->cq_table[id]) + return; + + gc->cq_table[id] = NULL; +} + +int mana_gd_create_hwc_queue(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + if (spec->type == GDMA_EQ) + err = mana_gd_create_eq(gd, spec, false, queue); + else if (spec->type == GDMA_CQ) + mana_gd_create_cq(spec, queue); + + if (err) + goto out; + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) +{ + struct gdma_destroy_dma_region_req req = {}; + struct gdma_general_resp resp = {}; + int err; + + if (dma_region_handle == GDMA_INVALID_DMA_REGION) + return 0; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_DMA_REGION, sizeof(req), + sizeof(resp)); + req.dma_region_handle = dma_region_handle; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to destroy DMA region: %d, 0x%x\n", + err, resp.hdr.status); + return -EPROTO; + } + + return 0; +} +EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, NET_MANA); + +static int mana_gd_create_dma_region(struct gdma_dev *gd, + struct gdma_mem_info *gmi) +{ + unsigned int num_page = gmi->length / PAGE_SIZE; + struct gdma_create_dma_region_req *req = NULL; + struct gdma_create_dma_region_resp resp = {}; + struct gdma_context *gc = gd->gdma_context; + struct hw_channel_context *hwc; + u32 length = gmi->length; + size_t req_msg_size; + int err; + int i; + + if (length < PAGE_SIZE || !is_power_of_2(length)) + return -EINVAL; + + if (offset_in_page(gmi->virt_addr) != 0) + return -EINVAL; + + hwc = gc->hwc.driver_data; + req_msg_size = struct_size(req, page_addr_list, num_page); + if (req_msg_size > hwc->max_req_msg_size) + return -EINVAL; + + req = kzalloc(req_msg_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + mana_gd_init_req_hdr(&req->hdr, GDMA_CREATE_DMA_REGION, + req_msg_size, sizeof(resp)); + req->length = length; + req->offset_in_page = 0; + req->gdma_page_type = GDMA_PAGE_TYPE_4K; + req->page_count = num_page; + req->page_addr_list_len = num_page; + + for (i = 0; i < num_page; i++) + req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; + + err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); + if (err) + goto out; + + if (resp.hdr.status || + resp.dma_region_handle == GDMA_INVALID_DMA_REGION) { + dev_err(gc->dev, "Failed to create DMA region: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + goto out; + } + + gmi->dma_region_handle = resp.dma_region_handle; +out: + kfree(req); + return err; +} + +int mana_gd_create_mana_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + if (spec->type != GDMA_EQ) + return -EINVAL; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + err = mana_gd_create_dma_region(gd, gmi); + if (err) + goto out; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + err = mana_gd_create_eq(gd, spec, true, queue); + if (err) + goto out; + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_mem_info *gmi; + struct gdma_queue *queue; + int err; + + if (spec->type != GDMA_CQ && spec->type != GDMA_SQ && + spec->type != GDMA_RQ) + return -EINVAL; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + gmi = &queue->mem_info; + err = mana_gd_alloc_memory(gc, spec->queue_size, gmi); + if (err) + goto free_q; + + err = mana_gd_create_dma_region(gd, gmi); + if (err) + goto out; + + queue->head = 0; + queue->tail = 0; + queue->queue_mem_ptr = gmi->virt_addr; + queue->queue_size = spec->queue_size; + queue->monitor_avl_buf = spec->monitor_avl_buf; + queue->type = spec->type; + queue->gdma_dev = gd; + + if (spec->type == GDMA_CQ) + mana_gd_create_cq(spec, queue); + + *queue_ptr = queue; + return 0; +out: + mana_gd_free_memory(gmi); +free_q: + kfree(queue); + return err; +} + +void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue) +{ + struct gdma_mem_info *gmi = &queue->mem_info; + + switch (queue->type) { + case GDMA_EQ: + mana_gd_destroy_eq(gc, queue->eq.disable_needed, queue); + break; + + case GDMA_CQ: + mana_gd_destroy_cq(gc, queue); + break; + + case GDMA_RQ: + break; + + case GDMA_SQ: + break; + + default: + dev_err(gc->dev, "Can't destroy unknown queue: type=%d\n", + queue->type); + return; + } + + mana_gd_destroy_dma_region(gc, gmi->dma_region_handle); + mana_gd_free_memory(gmi); + kfree(queue); +} + +int mana_gd_verify_vf_version(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_verify_ver_resp resp = {}; + struct gdma_verify_ver_req req = {}; + struct hw_channel_context *hwc; + int err; + + hwc = gc->hwc.driver_data; + mana_gd_init_req_hdr(&req.hdr, GDMA_VERIFY_VF_DRIVER_VERSION, + sizeof(req), sizeof(resp)); + + req.protocol_ver_min = GDMA_PROTOCOL_FIRST; + req.protocol_ver_max = GDMA_PROTOCOL_LAST; + + req.gd_drv_cap_flags1 = GDMA_DRV_CAP_FLAGS1; + req.gd_drv_cap_flags2 = GDMA_DRV_CAP_FLAGS2; + req.gd_drv_cap_flags3 = GDMA_DRV_CAP_FLAGS3; + req.gd_drv_cap_flags4 = GDMA_DRV_CAP_FLAGS4; + + req.drv_ver = 0; /* Unused*/ + req.os_type = 0x10; /* Linux */ + req.os_ver_major = LINUX_VERSION_MAJOR; + req.os_ver_minor = LINUX_VERSION_PATCHLEVEL; + req.os_ver_build = LINUX_VERSION_SUBLEVEL; + strscpy(req.os_ver_str1, utsname()->sysname, sizeof(req.os_ver_str1)); + strscpy(req.os_ver_str2, utsname()->release, sizeof(req.os_ver_str2)); + strscpy(req.os_ver_str3, utsname()->version, sizeof(req.os_ver_str3)); + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "VfVerifyVersionOutput: %d, status=0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + if (resp.pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) { + err = mana_gd_query_hwc_timeout(pdev, &hwc->hwc_timeout); + if (err) { + dev_err(gc->dev, "Failed to set the hwc timeout %d\n", err); + return err; + } + dev_dbg(gc->dev, "set the hwc timeout to %u\n", hwc->hwc_timeout); + } + return 0; +} + +int mana_gd_register_device(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_register_device_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + gd->gpa_mkey = INVALID_MEM_KEY; + + mana_gd_init_req_hdr(&req.hdr, GDMA_REGISTER_DEVICE, sizeof(req), + sizeof(resp)); + + req.hdr.dev_id = gd->dev_id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "gdma_register_device_resp failed: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + gd->pdid = resp.pdid; + gd->gpa_mkey = resp.gpa_mkey; + gd->doorbell = resp.db_id; + + return 0; +} + +int mana_gd_deregister_device(struct gdma_dev *gd) +{ + struct gdma_context *gc = gd->gdma_context; + struct gdma_general_resp resp = {}; + struct gdma_general_req req = {}; + int err; + + if (gd->pdid == INVALID_PDID) + return -EINVAL; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DEREGISTER_DEVICE, sizeof(req), + sizeof(resp)); + + req.hdr.dev_id = gd->dev_id; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to deregister device: %d, 0x%x\n", + err, resp.hdr.status); + if (!err) + err = -EPROTO; + } + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + gd->gpa_mkey = INVALID_MEM_KEY; + + return err; +} + +u32 mana_gd_wq_avail_space(struct gdma_queue *wq) +{ + u32 used_space = (wq->head - wq->tail) * GDMA_WQE_BU_SIZE; + u32 wq_size = wq->queue_size; + + WARN_ON_ONCE(used_space > wq_size); + + return wq_size - used_space; +} + +u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset) +{ + u32 offset = (wqe_offset * GDMA_WQE_BU_SIZE) & (wq->queue_size - 1); + + WARN_ON_ONCE((offset + GDMA_WQE_BU_SIZE) > wq->queue_size); + + return wq->queue_mem_ptr + offset; +} + +static u32 mana_gd_write_client_oob(const struct gdma_wqe_request *wqe_req, + enum gdma_queue_type q_type, + u32 client_oob_size, u32 sgl_data_size, + u8 *wqe_ptr) +{ + bool oob_in_sgl = !!(wqe_req->flags & GDMA_WR_OOB_IN_SGL); + bool pad_data = !!(wqe_req->flags & GDMA_WR_PAD_BY_SGE0); + struct gdma_wqe *header = (struct gdma_wqe *)wqe_ptr; + u8 *ptr; + + memset(header, 0, sizeof(struct gdma_wqe)); + header->num_sge = wqe_req->num_sge; + header->inline_oob_size_div4 = client_oob_size / sizeof(u32); + + if (oob_in_sgl) { + WARN_ON_ONCE(!pad_data || wqe_req->num_sge < 2); + + header->client_oob_in_sgl = 1; + + if (pad_data) + header->last_vbytes = wqe_req->sgl[0].size; + } + + if (q_type == GDMA_SQ) + header->client_data_unit = wqe_req->client_data_unit; + + /* The size of gdma_wqe + client_oob_size must be less than or equal + * to one Basic Unit (i.e. 32 bytes), so the pointer can't go beyond + * the queue memory buffer boundary. + */ + ptr = wqe_ptr + sizeof(header); + + if (wqe_req->inline_oob_data && wqe_req->inline_oob_size > 0) { + memcpy(ptr, wqe_req->inline_oob_data, wqe_req->inline_oob_size); + + if (client_oob_size > wqe_req->inline_oob_size) + memset(ptr + wqe_req->inline_oob_size, 0, + client_oob_size - wqe_req->inline_oob_size); + } + + return sizeof(header) + client_oob_size; +} + +static void mana_gd_write_sgl(struct gdma_queue *wq, u8 *wqe_ptr, + const struct gdma_wqe_request *wqe_req) +{ + u32 sgl_size = sizeof(struct gdma_sge) * wqe_req->num_sge; + const u8 *address = (u8 *)wqe_req->sgl; + u8 *base_ptr, *end_ptr; + u32 size_to_end; + + base_ptr = wq->queue_mem_ptr; + end_ptr = base_ptr + wq->queue_size; + size_to_end = (u32)(end_ptr - wqe_ptr); + + if (size_to_end < sgl_size) { + memcpy(wqe_ptr, address, size_to_end); + + wqe_ptr = base_ptr; + address += size_to_end; + sgl_size -= size_to_end; + } + + memcpy(wqe_ptr, address, sgl_size); +} + +int mana_gd_post_work_request(struct gdma_queue *wq, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info) +{ + u32 client_oob_size = wqe_req->inline_oob_size; + struct gdma_context *gc; + u32 sgl_data_size; + u32 max_wqe_size; + u32 wqe_size; + u8 *wqe_ptr; + + if (wqe_req->num_sge == 0) + return -EINVAL; + + if (wq->type == GDMA_RQ) { + if (client_oob_size != 0) + return -EINVAL; + + client_oob_size = INLINE_OOB_SMALL_SIZE; + + max_wqe_size = GDMA_MAX_RQE_SIZE; + } else { + if (client_oob_size != INLINE_OOB_SMALL_SIZE && + client_oob_size != INLINE_OOB_LARGE_SIZE) + return -EINVAL; + + max_wqe_size = GDMA_MAX_SQE_SIZE; + } + + sgl_data_size = sizeof(struct gdma_sge) * wqe_req->num_sge; + wqe_size = ALIGN(sizeof(struct gdma_wqe) + client_oob_size + + sgl_data_size, GDMA_WQE_BU_SIZE); + if (wqe_size > max_wqe_size) + return -EINVAL; + + if (wq->monitor_avl_buf && wqe_size > mana_gd_wq_avail_space(wq)) { + gc = wq->gdma_dev->gdma_context; + dev_err(gc->dev, "unsuccessful flow control!\n"); + return -ENOSPC; + } + + if (wqe_info) + wqe_info->wqe_size_in_bu = wqe_size / GDMA_WQE_BU_SIZE; + + wqe_ptr = mana_gd_get_wqe_ptr(wq, wq->head); + wqe_ptr += mana_gd_write_client_oob(wqe_req, wq->type, client_oob_size, + sgl_data_size, wqe_ptr); + if (wqe_ptr >= (u8 *)wq->queue_mem_ptr + wq->queue_size) + wqe_ptr -= wq->queue_size; + + mana_gd_write_sgl(wq, wqe_ptr, wqe_req); + + wq->head += wqe_size / GDMA_WQE_BU_SIZE; + + return 0; +} + +int mana_gd_post_and_ring(struct gdma_queue *queue, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info) +{ + struct gdma_context *gc = queue->gdma_dev->gdma_context; + int err; + + err = mana_gd_post_work_request(queue, wqe_req, wqe_info); + if (err) + return err; + + mana_gd_wq_ring_doorbell(gc, queue); + + return 0; +} + +static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp) +{ + unsigned int num_cqe = cq->queue_size / sizeof(struct gdma_cqe); + struct gdma_cqe *cq_cqe = cq->queue_mem_ptr; + u32 owner_bits, new_bits, old_bits; + struct gdma_cqe *cqe; + + cqe = &cq_cqe[cq->head % num_cqe]; + owner_bits = cqe->cqe_info.owner_bits; + + old_bits = (cq->head / num_cqe - 1) & GDMA_CQE_OWNER_MASK; + /* Return 0 if no more entries. */ + if (owner_bits == old_bits) + return 0; + + new_bits = (cq->head / num_cqe) & GDMA_CQE_OWNER_MASK; + /* Return -1 if overflow detected. */ + if (WARN_ON_ONCE(owner_bits != new_bits)) + return -1; + + /* Per GDMA spec, rmb is necessary after checking owner_bits, before + * reading completion info + */ + rmb(); + + comp->wq_num = cqe->cqe_info.wq_num; + comp->is_sq = cqe->cqe_info.is_sq; + memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE); + + return 1; +} + +int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe) +{ + int cqe_idx; + int ret; + + for (cqe_idx = 0; cqe_idx < num_cqe; cqe_idx++) { + ret = mana_gd_read_cqe(cq, &comp[cqe_idx]); + + if (ret < 0) { + cq->head -= cqe_idx; + return ret; + } + + if (ret == 0) + break; + + cq->head++; + } + + return cqe_idx; +} + +static irqreturn_t mana_gd_intr(int irq, void *arg) +{ + struct gdma_irq_context *gic = arg; + + if (gic->handler) + gic->handler(gic->arg); + + return IRQ_HANDLED; +} + +int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r) +{ + r->map = bitmap_zalloc(res_avail, GFP_KERNEL); + if (!r->map) + return -ENOMEM; + + r->size = res_avail; + spin_lock_init(&r->lock); + + return 0; +} + +void mana_gd_free_res_map(struct gdma_resource *r) +{ + bitmap_free(r->map); + r->map = NULL; + r->size = 0; +} + +static int mana_gd_setup_irqs(struct pci_dev *pdev) +{ + unsigned int max_queues_per_port = num_online_cpus(); + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; + unsigned int max_irqs, cpu; + int nvec, irq; + int err, i = 0, j; + + if (max_queues_per_port > MANA_MAX_NUM_QUEUES) + max_queues_per_port = MANA_MAX_NUM_QUEUES; + + /* Need 1 interrupt for the Hardware communication Channel (HWC) */ + max_irqs = max_queues_per_port + 1; + + nvec = pci_alloc_irq_vectors(pdev, 2, max_irqs, PCI_IRQ_MSIX); + if (nvec < 0) + return nvec; + + gc->irq_contexts = kcalloc(nvec, sizeof(struct gdma_irq_context), + GFP_KERNEL); + if (!gc->irq_contexts) { + err = -ENOMEM; + goto free_irq_vector; + } + + for (i = 0; i < nvec; i++) { + gic = &gc->irq_contexts[i]; + gic->handler = NULL; + gic->arg = NULL; + + if (!i) + snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s", + pci_name(pdev)); + else + snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s", + i - 1, pci_name(pdev)); + + irq = pci_irq_vector(pdev, i); + if (irq < 0) { + err = irq; + goto free_irq; + } + + err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); + if (err) + goto free_irq; + + cpu = cpumask_local_spread(i, gc->numa_node); + irq_set_affinity_and_hint(irq, cpumask_of(cpu)); + } + + err = mana_gd_alloc_res_map(nvec, &gc->msix_resource); + if (err) + goto free_irq; + + gc->max_num_msix = nvec; + gc->num_msix_usable = nvec; + + return 0; + +free_irq: + for (j = i - 1; j >= 0; j--) { + irq = pci_irq_vector(pdev, j); + gic = &gc->irq_contexts[j]; + + irq_update_affinity_hint(irq, NULL); + free_irq(irq, gic); + } + + kfree(gc->irq_contexts); + gc->irq_contexts = NULL; +free_irq_vector: + pci_free_irq_vectors(pdev); + return err; +} + +static void mana_gd_remove_irqs(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + struct gdma_irq_context *gic; + int irq, i; + + if (gc->max_num_msix < 1) + return; + + mana_gd_free_res_map(&gc->msix_resource); + + for (i = 0; i < gc->max_num_msix; i++) { + irq = pci_irq_vector(pdev, i); + if (irq < 0) + continue; + + gic = &gc->irq_contexts[i]; + + /* Need to clear the hint before free_irq */ + irq_update_affinity_hint(irq, NULL); + free_irq(irq, gic); + } + + pci_free_irq_vectors(pdev); + + gc->max_num_msix = 0; + gc->num_msix_usable = 0; + kfree(gc->irq_contexts); + gc->irq_contexts = NULL; +} + +static int mana_gd_setup(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + int err; + + mana_gd_init_registers(pdev); + mana_smc_init(&gc->shm_channel, gc->dev, gc->shm_base); + + err = mana_gd_setup_irqs(pdev); + if (err) + return err; + + err = mana_hwc_create_channel(gc); + if (err) + goto remove_irq; + + err = mana_gd_verify_vf_version(pdev); + if (err) + goto destroy_hwc; + + err = mana_gd_query_max_resources(pdev); + if (err) + goto destroy_hwc; + + err = mana_gd_detect_devices(pdev); + if (err) + goto destroy_hwc; + + return 0; + +destroy_hwc: + mana_hwc_destroy_channel(gc); +remove_irq: + mana_gd_remove_irqs(pdev); + return err; +} + +static void mana_gd_cleanup(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + mana_hwc_destroy_channel(gc); + + mana_gd_remove_irqs(pdev); +} + +static bool mana_is_pf(unsigned short dev_id) +{ + return dev_id == MANA_PF_DEVICE_ID; +} + +static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct gdma_context *gc; + void __iomem *bar0_va; + int bar = 0; + int err; + + /* Each port has 2 CQs, each CQ has at most 1 EQE at a time */ + BUILD_BUG_ON(2 * MAX_PORTS_IN_MANA_DEV * GDMA_EQE_SIZE > EQ_SIZE); + + err = pci_enable_device(pdev); + if (err) + return -ENXIO; + + pci_set_master(pdev); + + err = pci_request_regions(pdev, "mana"); + if (err) + goto disable_dev; + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) + goto release_region; + + err = dma_set_max_seg_size(&pdev->dev, UINT_MAX); + if (err) { + dev_err(&pdev->dev, "Failed to set dma device segment size\n"); + goto release_region; + } + + err = -ENOMEM; + gc = vzalloc(sizeof(*gc)); + if (!gc) + goto release_region; + + mutex_init(&gc->eq_test_event_mutex); + pci_set_drvdata(pdev, gc); + gc->bar0_pa = pci_resource_start(pdev, 0); + + bar0_va = pci_iomap(pdev, bar, 0); + if (!bar0_va) + goto free_gc; + + gc->numa_node = dev_to_node(&pdev->dev); + gc->is_pf = mana_is_pf(pdev->device); + gc->bar0_va = bar0_va; + gc->dev = &pdev->dev; + + err = mana_gd_setup(pdev); + if (err) + goto unmap_bar; + + err = mana_probe(&gc->mana, false); + if (err) + goto cleanup_gd; + + return 0; + +cleanup_gd: + mana_gd_cleanup(pdev); +unmap_bar: + pci_iounmap(pdev, bar0_va); +free_gc: + pci_set_drvdata(pdev, NULL); + vfree(gc); +release_region: + pci_release_regions(pdev); +disable_dev: + pci_disable_device(pdev); + dev_err(&pdev->dev, "gdma probe failed: err = %d\n", err); + return err; +} + +static void mana_gd_remove(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + mana_remove(&gc->mana, false); + + mana_gd_cleanup(pdev); + + pci_iounmap(pdev, gc->bar0_va); + + vfree(gc); + + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +/* The 'state' parameter is not used. */ +static int mana_gd_suspend(struct pci_dev *pdev, pm_message_t state) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + mana_remove(&gc->mana, true); + + mana_gd_cleanup(pdev); + + return 0; +} + +/* In case the NIC hardware stops working, the suspend and resume callbacks will + * fail -- if this happens, it's safer to just report an error than try to undo + * what has been done. + */ +static int mana_gd_resume(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + int err; + + err = mana_gd_setup(pdev); + if (err) + return err; + + err = mana_probe(&gc->mana, true); + if (err) + return err; + + return 0; +} + +/* Quiesce the device for kexec. This is also called upon reboot/shutdown. */ +static void mana_gd_shutdown(struct pci_dev *pdev) +{ + struct gdma_context *gc = pci_get_drvdata(pdev); + + dev_info(&pdev->dev, "Shutdown was called\n"); + + mana_remove(&gc->mana, true); + + mana_gd_cleanup(pdev); + + pci_disable_device(pdev); +} + +static const struct pci_device_id mana_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_PF_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_VF_DEVICE_ID) }, + { } +}; + +static struct pci_driver mana_driver = { + .name = "mana", + .id_table = mana_id_table, + .probe = mana_gd_probe, + .remove = mana_gd_remove, + .suspend = mana_gd_suspend, + .resume = mana_gd_resume, + .shutdown = mana_gd_shutdown, +}; + +module_pci_driver(mana_driver); + +MODULE_DEVICE_TABLE(pci, mana_id_table); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Microsoft Azure Network Adapter driver"); diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c new file mode 100644 index 0000000000..9d1cd3bfcf --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -0,0 +1,870 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include +#include + +static int mana_hwc_get_msg_index(struct hw_channel_context *hwc, u16 *msg_id) +{ + struct gdma_resource *r = &hwc->inflight_msg_res; + unsigned long flags; + u32 index; + + down(&hwc->sema); + + spin_lock_irqsave(&r->lock, flags); + + index = find_first_zero_bit(hwc->inflight_msg_res.map, + hwc->inflight_msg_res.size); + + bitmap_set(hwc->inflight_msg_res.map, index, 1); + + spin_unlock_irqrestore(&r->lock, flags); + + *msg_id = index; + + return 0; +} + +static void mana_hwc_put_msg_index(struct hw_channel_context *hwc, u16 msg_id) +{ + struct gdma_resource *r = &hwc->inflight_msg_res; + unsigned long flags; + + spin_lock_irqsave(&r->lock, flags); + bitmap_clear(hwc->inflight_msg_res.map, msg_id, 1); + spin_unlock_irqrestore(&r->lock, flags); + + up(&hwc->sema); +} + +static int mana_hwc_verify_resp_msg(const struct hwc_caller_ctx *caller_ctx, + const struct gdma_resp_hdr *resp_msg, + u32 resp_len) +{ + if (resp_len < sizeof(*resp_msg)) + return -EPROTO; + + if (resp_len > caller_ctx->output_buflen) + return -EPROTO; + + return 0; +} + +static void mana_hwc_handle_resp(struct hw_channel_context *hwc, u32 resp_len, + const struct gdma_resp_hdr *resp_msg) +{ + struct hwc_caller_ctx *ctx; + int err; + + if (!test_bit(resp_msg->response.hwc_msg_id, + hwc->inflight_msg_res.map)) { + dev_err(hwc->dev, "hwc_rx: invalid msg_id = %u\n", + resp_msg->response.hwc_msg_id); + return; + } + + ctx = hwc->caller_ctx + resp_msg->response.hwc_msg_id; + err = mana_hwc_verify_resp_msg(ctx, resp_msg, resp_len); + if (err) + goto out; + + ctx->status_code = resp_msg->status; + + memcpy(ctx->output_buf, resp_msg, resp_len); +out: + ctx->error = err; + complete(&ctx->comp_event); +} + +static int mana_hwc_post_rx_wqe(const struct hwc_wq *hwc_rxq, + struct hwc_work_request *req) +{ + struct device *dev = hwc_rxq->hwc->dev; + struct gdma_sge *sge; + int err; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_rxq->msg_buf->gpa_mkey; + sge->size = req->buf_len; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_rxq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC RQ: %d\n", err); + return err; +} + +static void mana_hwc_init_event_handler(void *ctx, struct gdma_queue *q_self, + struct gdma_event *event) +{ + struct hw_channel_context *hwc = ctx; + struct gdma_dev *gd = hwc->gdma_dev; + union hwc_init_type_data type_data; + union hwc_init_eq_id_db eq_db; + u32 type, val; + + switch (event->type) { + case GDMA_EQE_HWC_INIT_EQ_ID_DB: + eq_db.as_uint32 = event->details[0]; + hwc->cq->gdma_eq->id = eq_db.eq_id; + gd->doorbell = eq_db.doorbell; + break; + + case GDMA_EQE_HWC_INIT_DATA: + type_data.as_uint32 = event->details[0]; + type = type_data.type; + val = type_data.value; + + switch (type) { + case HWC_INIT_DATA_CQID: + hwc->cq->gdma_cq->id = val; + break; + + case HWC_INIT_DATA_RQID: + hwc->rxq->gdma_wq->id = val; + break; + + case HWC_INIT_DATA_SQID: + hwc->txq->gdma_wq->id = val; + break; + + case HWC_INIT_DATA_QUEUE_DEPTH: + hwc->hwc_init_q_depth_max = (u16)val; + break; + + case HWC_INIT_DATA_MAX_REQUEST: + hwc->hwc_init_max_req_msg_size = val; + break; + + case HWC_INIT_DATA_MAX_RESPONSE: + hwc->hwc_init_max_resp_msg_size = val; + break; + + case HWC_INIT_DATA_MAX_NUM_CQS: + gd->gdma_context->max_num_cqs = val; + break; + + case HWC_INIT_DATA_PDID: + hwc->gdma_dev->pdid = val; + break; + + case HWC_INIT_DATA_GPA_MKEY: + hwc->rxq->msg_buf->gpa_mkey = val; + hwc->txq->msg_buf->gpa_mkey = val; + break; + + case HWC_INIT_DATA_PF_DEST_RQ_ID: + hwc->pf_dest_vrq_id = val; + break; + + case HWC_INIT_DATA_PF_DEST_CQ_ID: + hwc->pf_dest_vrcq_id = val; + break; + } + + break; + + case GDMA_EQE_HWC_INIT_DONE: + complete(&hwc->hwc_init_eqe_comp); + break; + + case GDMA_EQE_HWC_SOC_RECONFIG_DATA: + type_data.as_uint32 = event->details[0]; + type = type_data.type; + val = type_data.value; + + switch (type) { + case HWC_DATA_CFG_HWC_TIMEOUT: + hwc->hwc_timeout = val; + break; + + default: + dev_warn(hwc->dev, "Received unknown reconfig type %u\n", type); + break; + } + + break; + + default: + dev_warn(hwc->dev, "Received unknown gdma event %u\n", event->type); + /* Ignore unknown events, which should never happen. */ + break; + } +} + +static void mana_hwc_rx_event_handler(void *ctx, u32 gdma_rxq_id, + const struct hwc_rx_oob *rx_oob) +{ + struct hw_channel_context *hwc = ctx; + struct hwc_wq *hwc_rxq = hwc->rxq; + struct hwc_work_request *rx_req; + struct gdma_resp_hdr *resp; + struct gdma_wqe *dma_oob; + struct gdma_queue *rq; + struct gdma_sge *sge; + u64 rq_base_addr; + u64 rx_req_idx; + u8 *wqe; + + if (WARN_ON_ONCE(hwc_rxq->gdma_wq->id != gdma_rxq_id)) + return; + + rq = hwc_rxq->gdma_wq; + wqe = mana_gd_get_wqe_ptr(rq, rx_oob->wqe_offset / GDMA_WQE_BU_SIZE); + dma_oob = (struct gdma_wqe *)wqe; + + sge = (struct gdma_sge *)(wqe + 8 + dma_oob->inline_oob_size_div4 * 4); + + /* Select the RX work request for virtual address and for reposting. */ + rq_base_addr = hwc_rxq->msg_buf->mem_info.dma_handle; + rx_req_idx = (sge->address - rq_base_addr) / hwc->max_req_msg_size; + + rx_req = &hwc_rxq->msg_buf->reqs[rx_req_idx]; + resp = (struct gdma_resp_hdr *)rx_req->buf_va; + + if (resp->response.hwc_msg_id >= hwc->num_inflight_msg) { + dev_err(hwc->dev, "HWC RX: wrong msg_id=%u\n", + resp->response.hwc_msg_id); + return; + } + + mana_hwc_handle_resp(hwc, rx_oob->tx_oob_data_size, resp); + + /* Do no longer use 'resp', because the buffer is posted to the HW + * in the below mana_hwc_post_rx_wqe(). + */ + resp = NULL; + + mana_hwc_post_rx_wqe(hwc_rxq, rx_req); +} + +static void mana_hwc_tx_event_handler(void *ctx, u32 gdma_txq_id, + const struct hwc_rx_oob *rx_oob) +{ + struct hw_channel_context *hwc = ctx; + struct hwc_wq *hwc_txq = hwc->txq; + + WARN_ON_ONCE(!hwc_txq || hwc_txq->gdma_wq->id != gdma_txq_id); +} + +static int mana_hwc_create_gdma_wq(struct hw_channel_context *hwc, + enum gdma_queue_type type, u64 queue_size, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + if (type != GDMA_SQ && type != GDMA_RQ) + return -EINVAL; + + spec.type = type; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static int mana_hwc_create_gdma_cq(struct hw_channel_context *hwc, + u64 queue_size, + void *ctx, gdma_cq_callback *cb, + struct gdma_queue *parent_eq, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + spec.cq.context = ctx; + spec.cq.callback = cb; + spec.cq.parent_eq = parent_eq; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static int mana_hwc_create_gdma_eq(struct hw_channel_context *hwc, + u64 queue_size, + void *ctx, gdma_eq_callback *cb, + struct gdma_queue **queue) +{ + struct gdma_queue_spec spec = {}; + + spec.type = GDMA_EQ; + spec.monitor_avl_buf = false; + spec.queue_size = queue_size; + spec.eq.context = ctx; + spec.eq.callback = cb; + spec.eq.log2_throttle_limit = DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ; + + return mana_gd_create_hwc_queue(hwc->gdma_dev, &spec, queue); +} + +static void mana_hwc_comp_event(void *ctx, struct gdma_queue *q_self) +{ + struct hwc_rx_oob comp_data = {}; + struct gdma_comp *completions; + struct hwc_cq *hwc_cq = ctx; + int comp_read, i; + + WARN_ON_ONCE(hwc_cq->gdma_cq != q_self); + + completions = hwc_cq->comp_buf; + comp_read = mana_gd_poll_cq(q_self, completions, hwc_cq->queue_depth); + WARN_ON_ONCE(comp_read <= 0 || comp_read > hwc_cq->queue_depth); + + for (i = 0; i < comp_read; ++i) { + comp_data = *(struct hwc_rx_oob *)completions[i].cqe_data; + + if (completions[i].is_sq) + hwc_cq->tx_event_handler(hwc_cq->tx_event_ctx, + completions[i].wq_num, + &comp_data); + else + hwc_cq->rx_event_handler(hwc_cq->rx_event_ctx, + completions[i].wq_num, + &comp_data); + } + + mana_gd_ring_cq(q_self, SET_ARM_BIT); +} + +static void mana_hwc_destroy_cq(struct gdma_context *gc, struct hwc_cq *hwc_cq) +{ + kfree(hwc_cq->comp_buf); + + if (hwc_cq->gdma_cq) + mana_gd_destroy_queue(gc, hwc_cq->gdma_cq); + + if (hwc_cq->gdma_eq) + mana_gd_destroy_queue(gc, hwc_cq->gdma_eq); + + kfree(hwc_cq); +} + +static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, + gdma_eq_callback *callback, void *ctx, + hwc_rx_event_handler_t *rx_ev_hdlr, + void *rx_ev_ctx, + hwc_tx_event_handler_t *tx_ev_hdlr, + void *tx_ev_ctx, struct hwc_cq **hwc_cq_ptr) +{ + struct gdma_queue *eq, *cq; + struct gdma_comp *comp_buf; + struct hwc_cq *hwc_cq; + u32 eq_size, cq_size; + int err; + + eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); + if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) + eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); + if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) + cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); + if (!hwc_cq) + return -ENOMEM; + + err = mana_hwc_create_gdma_eq(hwc, eq_size, ctx, callback, &eq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC EQ for RQ: %d\n", err); + goto out; + } + hwc_cq->gdma_eq = eq; + + err = mana_hwc_create_gdma_cq(hwc, cq_size, hwc_cq, mana_hwc_comp_event, + eq, &cq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC CQ for RQ: %d\n", err); + goto out; + } + hwc_cq->gdma_cq = cq; + + comp_buf = kcalloc(q_depth, sizeof(*comp_buf), GFP_KERNEL); + if (!comp_buf) { + err = -ENOMEM; + goto out; + } + + hwc_cq->hwc = hwc; + hwc_cq->comp_buf = comp_buf; + hwc_cq->queue_depth = q_depth; + hwc_cq->rx_event_handler = rx_ev_hdlr; + hwc_cq->rx_event_ctx = rx_ev_ctx; + hwc_cq->tx_event_handler = tx_ev_hdlr; + hwc_cq->tx_event_ctx = tx_ev_ctx; + + *hwc_cq_ptr = hwc_cq; + return 0; +out: + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc_cq); + return err; +} + +static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, + u32 max_msg_size, + struct hwc_dma_buf **dma_buf_ptr) +{ + struct gdma_context *gc = hwc->gdma_dev->gdma_context; + struct hwc_work_request *hwc_wr; + struct hwc_dma_buf *dma_buf; + struct gdma_mem_info *gmi; + void *virt_addr; + u32 buf_size; + u8 *base_pa; + int err; + u16 i; + + dma_buf = kzalloc(struct_size(dma_buf, reqs, q_depth), GFP_KERNEL); + if (!dma_buf) + return -ENOMEM; + + dma_buf->num_reqs = q_depth; + + buf_size = PAGE_ALIGN(q_depth * max_msg_size); + + gmi = &dma_buf->mem_info; + err = mana_gd_alloc_memory(gc, buf_size, gmi); + if (err) { + dev_err(hwc->dev, "Failed to allocate DMA buffer: %d\n", err); + goto out; + } + + virt_addr = dma_buf->mem_info.virt_addr; + base_pa = (u8 *)dma_buf->mem_info.dma_handle; + + for (i = 0; i < q_depth; i++) { + hwc_wr = &dma_buf->reqs[i]; + + hwc_wr->buf_va = virt_addr + i * max_msg_size; + hwc_wr->buf_sge_addr = base_pa + i * max_msg_size; + + hwc_wr->buf_len = max_msg_size; + } + + *dma_buf_ptr = dma_buf; + return 0; +out: + kfree(dma_buf); + return err; +} + +static void mana_hwc_dealloc_dma_buf(struct hw_channel_context *hwc, + struct hwc_dma_buf *dma_buf) +{ + if (!dma_buf) + return; + + mana_gd_free_memory(&dma_buf->mem_info); + + kfree(dma_buf); +} + +static void mana_hwc_destroy_wq(struct hw_channel_context *hwc, + struct hwc_wq *hwc_wq) +{ + mana_hwc_dealloc_dma_buf(hwc, hwc_wq->msg_buf); + + if (hwc_wq->gdma_wq) + mana_gd_destroy_queue(hwc->gdma_dev->gdma_context, + hwc_wq->gdma_wq); + + kfree(hwc_wq); +} + +static int mana_hwc_create_wq(struct hw_channel_context *hwc, + enum gdma_queue_type q_type, u16 q_depth, + u32 max_msg_size, struct hwc_cq *hwc_cq, + struct hwc_wq **hwc_wq_ptr) +{ + struct gdma_queue *queue; + struct hwc_wq *hwc_wq; + u32 queue_size; + int err; + + WARN_ON(q_type != GDMA_SQ && q_type != GDMA_RQ); + + if (q_type == GDMA_RQ) + queue_size = roundup_pow_of_two(GDMA_MAX_RQE_SIZE * q_depth); + else + queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); + + if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) + queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; + + hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); + if (!hwc_wq) + return -ENOMEM; + + err = mana_hwc_create_gdma_wq(hwc, q_type, queue_size, &queue); + if (err) + goto out; + + hwc_wq->hwc = hwc; + hwc_wq->gdma_wq = queue; + hwc_wq->queue_depth = q_depth; + hwc_wq->hwc_cq = hwc_cq; + + err = mana_hwc_alloc_dma_buf(hwc, q_depth, max_msg_size, + &hwc_wq->msg_buf); + if (err) + goto out; + + *hwc_wq_ptr = hwc_wq; + return 0; +out: + if (err) + mana_hwc_destroy_wq(hwc, hwc_wq); + return err; +} + +static int mana_hwc_post_tx_wqe(const struct hwc_wq *hwc_txq, + struct hwc_work_request *req, + u32 dest_virt_rq_id, u32 dest_virt_rcq_id, + bool dest_pf) +{ + struct device *dev = hwc_txq->hwc->dev; + struct hwc_tx_oob *tx_oob; + struct gdma_sge *sge; + int err; + + if (req->msg_size == 0 || req->msg_size > req->buf_len) { + dev_err(dev, "wrong msg_size: %u, buf_len: %u\n", + req->msg_size, req->buf_len); + return -EINVAL; + } + + tx_oob = &req->tx_oob; + + tx_oob->vrq_id = dest_virt_rq_id; + tx_oob->dest_vfid = 0; + tx_oob->vrcq_id = dest_virt_rcq_id; + tx_oob->vscq_id = hwc_txq->hwc_cq->gdma_cq->id; + tx_oob->loopback = false; + tx_oob->lso_override = false; + tx_oob->dest_pf = dest_pf; + tx_oob->vsq_id = hwc_txq->gdma_wq->id; + + sge = &req->sge; + sge->address = (u64)req->buf_sge_addr; + sge->mem_key = hwc_txq->msg_buf->gpa_mkey; + sge->size = req->msg_size; + + memset(&req->wqe_req, 0, sizeof(struct gdma_wqe_request)); + req->wqe_req.sgl = sge; + req->wqe_req.num_sge = 1; + req->wqe_req.inline_oob_size = sizeof(struct hwc_tx_oob); + req->wqe_req.inline_oob_data = tx_oob; + req->wqe_req.client_data_unit = 0; + + err = mana_gd_post_and_ring(hwc_txq->gdma_wq, &req->wqe_req, NULL); + if (err) + dev_err(dev, "Failed to post WQE on HWC SQ: %d\n", err); + return err; +} + +static int mana_hwc_init_inflight_msg(struct hw_channel_context *hwc, + u16 num_msg) +{ + int err; + + sema_init(&hwc->sema, num_msg); + + err = mana_gd_alloc_res_map(num_msg, &hwc->inflight_msg_res); + if (err) + dev_err(hwc->dev, "Failed to init inflight_msg_res: %d\n", err); + return err; +} + +static int mana_hwc_test_channel(struct hw_channel_context *hwc, u16 q_depth, + u32 max_req_msg_size, u32 max_resp_msg_size) +{ + struct gdma_context *gc = hwc->gdma_dev->gdma_context; + struct hwc_wq *hwc_rxq = hwc->rxq; + struct hwc_work_request *req; + struct hwc_caller_ctx *ctx; + int err; + int i; + + /* Post all WQEs on the RQ */ + for (i = 0; i < q_depth; i++) { + req = &hwc_rxq->msg_buf->reqs[i]; + err = mana_hwc_post_rx_wqe(hwc_rxq, req); + if (err) + return err; + } + + ctx = kcalloc(q_depth, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + for (i = 0; i < q_depth; ++i) + init_completion(&ctx[i].comp_event); + + hwc->caller_ctx = ctx; + + return mana_gd_test_eq(gc, hwc->cq->gdma_eq); +} + +static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth, + u32 *max_req_msg_size, + u32 *max_resp_msg_size) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + struct gdma_queue *rq = hwc->rxq->gdma_wq; + struct gdma_queue *sq = hwc->txq->gdma_wq; + struct gdma_queue *eq = hwc->cq->gdma_eq; + struct gdma_queue *cq = hwc->cq->gdma_cq; + int err; + + init_completion(&hwc->hwc_init_eqe_comp); + + err = mana_smc_setup_hwc(&gc->shm_channel, false, + eq->mem_info.dma_handle, + cq->mem_info.dma_handle, + rq->mem_info.dma_handle, + sq->mem_info.dma_handle, + eq->eq.msix_index); + if (err) + return err; + + if (!wait_for_completion_timeout(&hwc->hwc_init_eqe_comp, 60 * HZ)) + return -ETIMEDOUT; + + *q_depth = hwc->hwc_init_q_depth_max; + *max_req_msg_size = hwc->hwc_init_max_req_msg_size; + *max_resp_msg_size = hwc->hwc_init_max_resp_msg_size; + + /* Both were set in mana_hwc_init_event_handler(). */ + if (WARN_ON(cq->id >= gc->max_num_cqs)) + return -EPROTO; + + gc->cq_table = vcalloc(gc->max_num_cqs, sizeof(struct gdma_queue *)); + if (!gc->cq_table) + return -ENOMEM; + + gc->cq_table[cq->id] = cq; + + return 0; +} + +static int mana_hwc_init_queues(struct hw_channel_context *hwc, u16 q_depth, + u32 max_req_msg_size, u32 max_resp_msg_size) +{ + int err; + + err = mana_hwc_init_inflight_msg(hwc, q_depth); + if (err) + return err; + + /* CQ is shared by SQ and RQ, so CQ's queue depth is the sum of SQ + * queue depth and RQ queue depth. + */ + err = mana_hwc_create_cq(hwc, q_depth * 2, + mana_hwc_init_event_handler, hwc, + mana_hwc_rx_event_handler, hwc, + mana_hwc_tx_event_handler, hwc, &hwc->cq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC CQ: %d\n", err); + goto out; + } + + err = mana_hwc_create_wq(hwc, GDMA_RQ, q_depth, max_req_msg_size, + hwc->cq, &hwc->rxq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC RQ: %d\n", err); + goto out; + } + + err = mana_hwc_create_wq(hwc, GDMA_SQ, q_depth, max_resp_msg_size, + hwc->cq, &hwc->txq); + if (err) { + dev_err(hwc->dev, "Failed to create HWC SQ: %d\n", err); + goto out; + } + + hwc->num_inflight_msg = q_depth; + hwc->max_req_msg_size = max_req_msg_size; + + return 0; +out: + /* mana_hwc_create_channel() will do the cleanup.*/ + return err; +} + +int mana_hwc_create_channel(struct gdma_context *gc) +{ + u32 max_req_msg_size, max_resp_msg_size; + struct gdma_dev *gd = &gc->hwc; + struct hw_channel_context *hwc; + u16 q_depth_max; + int err; + + hwc = kzalloc(sizeof(*hwc), GFP_KERNEL); + if (!hwc) + return -ENOMEM; + + gd->gdma_context = gc; + gd->driver_data = hwc; + hwc->gdma_dev = gd; + hwc->dev = gc->dev; + hwc->hwc_timeout = HW_CHANNEL_WAIT_RESOURCE_TIMEOUT_MS; + + /* HWC's instance number is always 0. */ + gd->dev_id.as_uint32 = 0; + gd->dev_id.type = GDMA_DEVICE_HWC; + + gd->pdid = INVALID_PDID; + gd->doorbell = INVALID_DOORBELL; + + /* mana_hwc_init_queues() only creates the required data structures, + * and doesn't touch the HWC device. + */ + err = mana_hwc_init_queues(hwc, HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, + HW_CHANNEL_MAX_REQUEST_SIZE, + HW_CHANNEL_MAX_RESPONSE_SIZE); + if (err) { + dev_err(hwc->dev, "Failed to initialize HWC: %d\n", err); + goto out; + } + + err = mana_hwc_establish_channel(gc, &q_depth_max, &max_req_msg_size, + &max_resp_msg_size); + if (err) { + dev_err(hwc->dev, "Failed to establish HWC: %d\n", err); + goto out; + } + + err = mana_hwc_test_channel(gc->hwc.driver_data, + HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH, + max_req_msg_size, max_resp_msg_size); + if (err) { + dev_err(hwc->dev, "Failed to test HWC: %d\n", err); + goto out; + } + + return 0; +out: + mana_hwc_destroy_channel(gc); + return err; +} + +void mana_hwc_destroy_channel(struct gdma_context *gc) +{ + struct hw_channel_context *hwc = gc->hwc.driver_data; + + if (!hwc) + return; + + /* gc->max_num_cqs is set in mana_hwc_init_event_handler(). If it's + * non-zero, the HWC worked and we should tear down the HWC here. + */ + if (gc->max_num_cqs > 0) { + mana_smc_teardown_hwc(&gc->shm_channel, false); + gc->max_num_cqs = 0; + } + + kfree(hwc->caller_ctx); + hwc->caller_ctx = NULL; + + if (hwc->txq) + mana_hwc_destroy_wq(hwc, hwc->txq); + + if (hwc->rxq) + mana_hwc_destroy_wq(hwc, hwc->rxq); + + if (hwc->cq) + mana_hwc_destroy_cq(hwc->gdma_dev->gdma_context, hwc->cq); + + mana_gd_free_res_map(&hwc->inflight_msg_res); + + hwc->num_inflight_msg = 0; + + hwc->gdma_dev->doorbell = INVALID_DOORBELL; + hwc->gdma_dev->pdid = INVALID_PDID; + + hwc->hwc_timeout = 0; + + kfree(hwc); + gc->hwc.driver_data = NULL; + gc->hwc.gdma_context = NULL; + + vfree(gc->cq_table); + gc->cq_table = NULL; +} + +int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + const void *req, u32 resp_len, void *resp) +{ + struct gdma_context *gc = hwc->gdma_dev->gdma_context; + struct hwc_work_request *tx_wr; + struct hwc_wq *txq = hwc->txq; + struct gdma_req_hdr *req_msg; + struct hwc_caller_ctx *ctx; + u32 dest_vrcq = 0; + u32 dest_vrq = 0; + u16 msg_id; + int err; + + mana_hwc_get_msg_index(hwc, &msg_id); + + tx_wr = &txq->msg_buf->reqs[msg_id]; + + if (req_len > tx_wr->buf_len) { + dev_err(hwc->dev, "HWC: req msg size: %d > %d\n", req_len, + tx_wr->buf_len); + err = -EINVAL; + goto out; + } + + ctx = hwc->caller_ctx + msg_id; + ctx->output_buf = resp; + ctx->output_buflen = resp_len; + + req_msg = (struct gdma_req_hdr *)tx_wr->buf_va; + if (req) + memcpy(req_msg, req, req_len); + + req_msg->req.hwc_msg_id = msg_id; + + tx_wr->msg_size = req_len; + + if (gc->is_pf) { + dest_vrq = hwc->pf_dest_vrq_id; + dest_vrcq = hwc->pf_dest_vrcq_id; + } + + err = mana_hwc_post_tx_wqe(txq, tx_wr, dest_vrq, dest_vrcq, false); + if (err) { + dev_err(hwc->dev, "HWC: Failed to post send WQE: %d\n", err); + goto out; + } + + if (!wait_for_completion_timeout(&ctx->comp_event, + (msecs_to_jiffies(hwc->hwc_timeout) * HZ))) { + dev_err(hwc->dev, "HWC: Request timed out!\n"); + err = -ETIMEDOUT; + goto out; + } + + if (ctx->error) { + err = ctx->error; + goto out; + } + + if (ctx->status_code && ctx->status_code != GDMA_STATUS_MORE_ENTRIES) { + dev_err(hwc->dev, "HWC: Failed hw_channel req: 0x%x\n", + ctx->status_code); + err = -EPROTO; + goto out; + } +out: + mana_hwc_put_msg_index(hwc, msg_id); + return err; +} diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c new file mode 100644 index 0000000000..23b1521c0d --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include +#include +#include +#include +#include +#include + +#include + +void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev) +{ + u16 txq_idx = skb_get_queue_mapping(skb); + struct netdev_queue *ndevtxq; + int rc; + + __skb_push(skb, ETH_HLEN); + + ndevtxq = netdev_get_tx_queue(ndev, txq_idx); + __netif_tx_lock(ndevtxq, smp_processor_id()); + + rc = mana_start_xmit(skb, ndev); + + __netif_tx_unlock(ndevtxq); + + if (dev_xmit_complete(rc)) + return; + + dev_kfree_skb_any(skb); + ndev->stats.tx_dropped++; +} + +static int mana_xdp_xmit_fm(struct net_device *ndev, struct xdp_frame *frame, + u16 q_idx) +{ + struct sk_buff *skb; + + skb = xdp_build_skb_from_frame(frame, ndev); + if (unlikely(!skb)) + return -ENOMEM; + + skb_set_queue_mapping(skb, q_idx); + + mana_xdp_tx(skb, ndev); + + return 0; +} + +int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, + u32 flags) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct mana_stats_tx *tx_stats; + int i, count = 0; + u16 q_idx; + + if (unlikely(!apc->port_is_up)) + return 0; + + q_idx = smp_processor_id() % ndev->real_num_tx_queues; + + for (i = 0; i < n; i++) { + if (mana_xdp_xmit_fm(ndev, frames[i], q_idx)) + break; + + count++; + } + + tx_stats = &apc->tx_qp[q_idx].txq.stats; + + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->xdp_xmit += count; + u64_stats_update_end(&tx_stats->syncp); + + return count; +} + +u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, + struct xdp_buff *xdp, void *buf_va, uint pkt_len) +{ + struct mana_stats_rx *rx_stats; + struct bpf_prog *prog; + u32 act = XDP_PASS; + + rcu_read_lock(); + prog = rcu_dereference(rxq->bpf_prog); + + if (!prog) + goto out; + + xdp_init_buff(xdp, PAGE_SIZE, &rxq->xdp_rxq); + xdp_prepare_buff(xdp, buf_va, XDP_PACKET_HEADROOM, pkt_len, false); + + act = bpf_prog_run_xdp(prog, xdp); + + rx_stats = &rxq->stats; + + switch (act) { + case XDP_PASS: + case XDP_TX: + case XDP_DROP: + break; + + case XDP_REDIRECT: + rxq->xdp_rc = xdp_do_redirect(ndev, xdp, prog); + if (!rxq->xdp_rc) { + rxq->xdp_flush = true; + + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += pkt_len; + rx_stats->xdp_redirect++; + u64_stats_update_end(&rx_stats->syncp); + + break; + } + + fallthrough; + + case XDP_ABORTED: + trace_xdp_exception(ndev, prog, act); + break; + + default: + bpf_warn_invalid_xdp_action(ndev, prog, act); + } + +out: + rcu_read_unlock(); + + return act; +} + +struct bpf_prog *mana_xdp_get(struct mana_port_context *apc) +{ + ASSERT_RTNL(); + + return apc->bpf_prog; +} + +static struct bpf_prog *mana_chn_xdp_get(struct mana_port_context *apc) +{ + return rtnl_dereference(apc->rxqs[0]->bpf_prog); +} + +/* Set xdp program on channels */ +void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog = mana_chn_xdp_get(apc); + unsigned int num_queues = apc->num_queues; + int i; + + ASSERT_RTNL(); + + if (old_prog == prog) + return; + + if (prog) + bpf_prog_add(prog, num_queues); + + for (i = 0; i < num_queues; i++) + rcu_assign_pointer(apc->rxqs[i]->bpf_prog, prog); + + if (old_prog) + for (i = 0; i < num_queues; i++) + bpf_prog_put(old_prog); +} + +static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct bpf_prog *old_prog; + struct gdma_context *gc; + + gc = apc->ac->gdma_dev->gdma_context; + + old_prog = mana_xdp_get(apc); + + if (!old_prog && !prog) + return 0; + + if (prog && ndev->mtu > MANA_XDP_MTU_MAX) { + netdev_err(ndev, "XDP: mtu:%u too large, mtu_max:%lu\n", + ndev->mtu, MANA_XDP_MTU_MAX); + NL_SET_ERR_MSG_MOD(extack, "XDP: mtu too large"); + + return -EOPNOTSUPP; + } + + /* One refcnt of the prog is hold by the caller already, so + * don't increase refcnt for this one. + */ + apc->bpf_prog = prog; + + if (old_prog) + bpf_prog_put(old_prog); + + if (apc->port_is_up) + mana_chn_setxdp(apc, prog); + + if (prog) + ndev->max_mtu = MANA_XDP_MTU_MAX; + else + ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; + + return 0; +} + +int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf) +{ + struct netlink_ext_ack *extack = bpf->extack; + int ret; + + switch (bpf->command) { + case XDP_SETUP_PROG: + return mana_xdp_set(ndev, bpf->prog, extack); + + default: + return -EOPNOTSUPP; + } + + return ret; +} diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c new file mode 100644 index 0000000000..48ea4aeeea --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -0,0 +1,2904 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +static DEFINE_IDA(mana_adev_ida); + +static int mana_adev_idx_alloc(void) +{ + return ida_alloc(&mana_adev_ida, GFP_KERNEL); +} + +static void mana_adev_idx_free(int idx) +{ + ida_free(&mana_adev_ida, idx); +} + +/* Microsoft Azure Network Adapter (MANA) functions */ + +static int mana_open(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + err = mana_alloc_queues(ndev); + if (err) + return err; + + apc->port_is_up = true; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + netif_carrier_on(ndev); + netif_tx_wake_all_queues(ndev); + + return 0; +} + +static int mana_close(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + if (!apc->port_is_up) + return 0; + + return mana_detach(ndev, true); +} + +static bool mana_can_tx(struct gdma_queue *wq) +{ + return mana_gd_wq_avail_space(wq) >= MAX_TX_WQE_SIZE; +} + +static unsigned int mana_checksum_info(struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *ip = ip_hdr(skb); + + if (ip->protocol == IPPROTO_TCP) + return IPPROTO_TCP; + + if (ip->protocol == IPPROTO_UDP) + return IPPROTO_UDP; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6 = ipv6_hdr(skb); + + if (ip6->nexthdr == IPPROTO_TCP) + return IPPROTO_TCP; + + if (ip6->nexthdr == IPPROTO_UDP) + return IPPROTO_UDP; + } + + /* No csum offloading */ + return 0; +} + +static void mana_add_sge(struct mana_tx_package *tp, struct mana_skb_head *ash, + int sg_i, dma_addr_t da, int sge_len, u32 gpa_mkey) +{ + ash->dma_handle[sg_i] = da; + ash->size[sg_i] = sge_len; + + tp->wqe_req.sgl[sg_i].address = da; + tp->wqe_req.sgl[sg_i].mem_key = gpa_mkey; + tp->wqe_req.sgl[sg_i].size = sge_len; +} + +static int mana_map_skb(struct sk_buff *skb, struct mana_port_context *apc, + struct mana_tx_package *tp, int gso_hs) +{ + struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; + int hsg = 1; /* num of SGEs of linear part */ + struct gdma_dev *gd = apc->ac->gdma_dev; + int skb_hlen = skb_headlen(skb); + int sge0_len, sge1_len = 0; + struct gdma_context *gc; + struct device *dev; + skb_frag_t *frag; + dma_addr_t da; + int sg_i; + int i; + + gc = gd->gdma_context; + dev = gc->dev; + + if (gso_hs && gso_hs < skb_hlen) { + sge0_len = gso_hs; + sge1_len = skb_hlen - gso_hs; + } else { + sge0_len = skb_hlen; + } + + da = dma_map_single(dev, skb->data, sge0_len, DMA_TO_DEVICE); + if (dma_mapping_error(dev, da)) + return -ENOMEM; + + mana_add_sge(tp, ash, 0, da, sge0_len, gd->gpa_mkey); + + if (sge1_len) { + sg_i = 1; + da = dma_map_single(dev, skb->data + sge0_len, sge1_len, + DMA_TO_DEVICE); + if (dma_mapping_error(dev, da)) + goto frag_err; + + mana_add_sge(tp, ash, sg_i, da, sge1_len, gd->gpa_mkey); + hsg = 2; + } + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + sg_i = hsg + i; + + frag = &skb_shinfo(skb)->frags[i]; + da = skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), + DMA_TO_DEVICE); + if (dma_mapping_error(dev, da)) + goto frag_err; + + mana_add_sge(tp, ash, sg_i, da, skb_frag_size(frag), + gd->gpa_mkey); + } + + return 0; + +frag_err: + for (i = sg_i - 1; i >= hsg; i--) + dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], + DMA_TO_DEVICE); + + for (i = hsg - 1; i >= 0; i--) + dma_unmap_single(dev, ash->dma_handle[i], ash->size[i], + DMA_TO_DEVICE); + + return -ENOMEM; +} + +/* Handle the case when GSO SKB linear length is too large. + * MANA NIC requires GSO packets to put only the packet header to SGE0. + * So, we need 2 SGEs for the skb linear part which contains more than the + * header. + * Return a positive value for the number of SGEs, or a negative value + * for an error. + */ +static int mana_fix_skb_head(struct net_device *ndev, struct sk_buff *skb, + int gso_hs) +{ + int num_sge = 1 + skb_shinfo(skb)->nr_frags; + int skb_hlen = skb_headlen(skb); + + if (gso_hs < skb_hlen) { + num_sge++; + } else if (gso_hs > skb_hlen) { + if (net_ratelimit()) + netdev_err(ndev, + "TX nonlinear head: hs:%d, skb_hlen:%d\n", + gso_hs, skb_hlen); + + return -EINVAL; + } + + return num_sge; +} + +/* Get the GSO packet's header size */ +static int mana_get_gso_hs(struct sk_buff *skb) +{ + int gso_hs; + + if (skb->encapsulation) { + gso_hs = skb_inner_tcp_all_headers(skb); + } else { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + gso_hs = skb_transport_offset(skb) + + sizeof(struct udphdr); + } else { + gso_hs = skb_tcp_all_headers(skb); + } + } + + return gso_hs; +} + +netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + enum mana_tx_pkt_format pkt_fmt = MANA_SHORT_PKT_FMT; + struct mana_port_context *apc = netdev_priv(ndev); + int gso_hs = 0; /* zero for non-GSO pkts */ + u16 txq_idx = skb_get_queue_mapping(skb); + struct gdma_dev *gd = apc->ac->gdma_dev; + bool ipv4 = false, ipv6 = false; + struct mana_tx_package pkg = {}; + struct netdev_queue *net_txq; + struct mana_stats_tx *tx_stats; + struct gdma_queue *gdma_sq; + unsigned int csum_type; + struct mana_txq *txq; + struct mana_cq *cq; + int err, len; + + if (unlikely(!apc->port_is_up)) + goto tx_drop; + + if (skb_cow_head(skb, MANA_HEADROOM)) + goto tx_drop_count; + + txq = &apc->tx_qp[txq_idx].txq; + gdma_sq = txq->gdma_sq; + cq = &apc->tx_qp[txq_idx].tx_cq; + tx_stats = &txq->stats; + + pkg.tx_oob.s_oob.vcq_num = cq->gdma_id; + pkg.tx_oob.s_oob.vsq_frame = txq->vsq_frame; + + if (txq->vp_offset > MANA_SHORT_VPORT_OFFSET_MAX) { + pkg.tx_oob.l_oob.long_vp_offset = txq->vp_offset; + pkt_fmt = MANA_LONG_PKT_FMT; + } else { + pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; + } + + if (skb_vlan_tag_present(skb)) { + pkt_fmt = MANA_LONG_PKT_FMT; + pkg.tx_oob.l_oob.inject_vlan_pri_tag = 1; + pkg.tx_oob.l_oob.pcp = skb_vlan_tag_get_prio(skb); + pkg.tx_oob.l_oob.dei = skb_vlan_tag_get_cfi(skb); + pkg.tx_oob.l_oob.vlan_id = skb_vlan_tag_get_id(skb); + } + + pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; + + if (pkt_fmt == MANA_SHORT_PKT_FMT) { + pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_short_oob); + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->short_pkt_fmt++; + u64_stats_update_end(&tx_stats->syncp); + } else { + pkg.wqe_req.inline_oob_size = sizeof(struct mana_tx_oob); + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->long_pkt_fmt++; + u64_stats_update_end(&tx_stats->syncp); + } + + pkg.wqe_req.inline_oob_data = &pkg.tx_oob; + pkg.wqe_req.flags = 0; + pkg.wqe_req.client_data_unit = 0; + + pkg.wqe_req.num_sge = 1 + skb_shinfo(skb)->nr_frags; + + if (skb->protocol == htons(ETH_P_IP)) + ipv4 = true; + else if (skb->protocol == htons(ETH_P_IPV6)) + ipv6 = true; + + if (skb_is_gso(skb)) { + int num_sge; + + gso_hs = mana_get_gso_hs(skb); + + num_sge = mana_fix_skb_head(ndev, skb, gso_hs); + if (num_sge > 0) + pkg.wqe_req.num_sge = num_sge; + else + goto tx_drop_count; + + u64_stats_update_begin(&tx_stats->syncp); + if (skb->encapsulation) { + tx_stats->tso_inner_packets++; + tx_stats->tso_inner_bytes += skb->len - gso_hs; + } else { + tx_stats->tso_packets++; + tx_stats->tso_bytes += skb->len - gso_hs; + } + u64_stats_update_end(&tx_stats->syncp); + + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_iphdr_csum = 1; + pkg.tx_oob.s_oob.comp_tcp_csum = 1; + pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); + + pkg.wqe_req.client_data_unit = skb_shinfo(skb)->gso_size; + pkg.wqe_req.flags = GDMA_WR_OOB_IN_SGL | GDMA_WR_PAD_BY_SGE0; + if (ipv4) { + ip_hdr(skb)->tot_len = 0; + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } else { + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, + IPPROTO_TCP, 0); + } + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + csum_type = mana_checksum_info(skb); + + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->csum_partial++; + u64_stats_update_end(&tx_stats->syncp); + + if (csum_type == IPPROTO_TCP) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_tcp_csum = 1; + pkg.tx_oob.s_oob.trans_off = skb_transport_offset(skb); + + } else if (csum_type == IPPROTO_UDP) { + pkg.tx_oob.s_oob.is_outer_ipv4 = ipv4; + pkg.tx_oob.s_oob.is_outer_ipv6 = ipv6; + + pkg.tx_oob.s_oob.comp_udp_csum = 1; + } else { + /* Can't do offload of this type of checksum */ + if (skb_checksum_help(skb)) + goto tx_drop_count; + } + } + + WARN_ON_ONCE(pkg.wqe_req.num_sge > MAX_TX_WQE_SGL_ENTRIES); + + if (pkg.wqe_req.num_sge <= ARRAY_SIZE(pkg.sgl_array)) { + pkg.wqe_req.sgl = pkg.sgl_array; + } else { + pkg.sgl_ptr = kmalloc_array(pkg.wqe_req.num_sge, + sizeof(struct gdma_sge), + GFP_ATOMIC); + if (!pkg.sgl_ptr) + goto tx_drop_count; + + pkg.wqe_req.sgl = pkg.sgl_ptr; + } + + if (mana_map_skb(skb, apc, &pkg, gso_hs)) { + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->mana_map_err++; + u64_stats_update_end(&tx_stats->syncp); + goto free_sgl_ptr; + } + + skb_queue_tail(&txq->pending_skbs, skb); + + len = skb->len; + net_txq = netdev_get_tx_queue(ndev, txq_idx); + + err = mana_gd_post_work_request(gdma_sq, &pkg.wqe_req, + (struct gdma_posted_wqe_info *)skb->cb); + if (!mana_can_tx(gdma_sq)) { + netif_tx_stop_queue(net_txq); + apc->eth_stats.stop_queue++; + } + + if (err) { + (void)skb_dequeue_tail(&txq->pending_skbs); + netdev_warn(ndev, "Failed to post TX OOB: %d\n", err); + err = NETDEV_TX_BUSY; + goto tx_busy; + } + + err = NETDEV_TX_OK; + atomic_inc(&txq->pending_sends); + + mana_gd_wq_ring_doorbell(gd->gdma_context, gdma_sq); + + /* skb may be freed after mana_gd_post_work_request. Do not use it. */ + skb = NULL; + + tx_stats = &txq->stats; + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->packets++; + tx_stats->bytes += len; + u64_stats_update_end(&tx_stats->syncp); + +tx_busy: + if (netif_tx_queue_stopped(net_txq) && mana_can_tx(gdma_sq)) { + netif_tx_wake_queue(net_txq); + apc->eth_stats.wake_queue++; + } + + kfree(pkg.sgl_ptr); + return err; + +free_sgl_ptr: + kfree(pkg.sgl_ptr); +tx_drop_count: + ndev->stats.tx_dropped++; +tx_drop: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static void mana_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *st) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + struct mana_stats_rx *rx_stats; + struct mana_stats_tx *tx_stats; + unsigned int start; + u64 packets, bytes; + int q; + + if (!apc->port_is_up) + return; + + netdev_stats_to_stats64(st, &ndev->stats); + + for (q = 0; q < num_queues; q++) { + rx_stats = &apc->rxqs[q]->stats; + + do { + start = u64_stats_fetch_begin(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + + st->rx_packets += packets; + st->rx_bytes += bytes; + } + + for (q = 0; q < num_queues; q++) { + tx_stats = &apc->tx_qp[q].txq.stats; + + do { + start = u64_stats_fetch_begin(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + st->tx_packets += packets; + st->tx_bytes += bytes; + } +} + +static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, + int old_q) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 hash = skb_get_hash(skb); + struct sock *sk = skb->sk; + int txq; + + txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + + if (txq != old_q && sk && sk_fullsock(sk) && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, txq); + + return txq; +} + +static u16 mana_select_queue(struct net_device *ndev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + int txq; + + if (ndev->real_num_tx_queues == 1) + return 0; + + txq = sk_tx_queue_get(skb->sk); + + if (txq < 0 || skb->ooo_okay || txq >= ndev->real_num_tx_queues) { + if (skb_rx_queue_recorded(skb)) + txq = skb_get_rx_queue(skb); + else + txq = mana_get_tx_queue(ndev, skb, txq); + } + + return txq; +} + +/* Release pre-allocated RX buffers */ +static void mana_pre_dealloc_rxbufs(struct mana_port_context *mpc) +{ + struct device *dev; + int i; + + dev = mpc->ac->gdma_dev->gdma_context->dev; + + if (!mpc->rxbufs_pre) + goto out1; + + if (!mpc->das_pre) + goto out2; + + while (mpc->rxbpre_total) { + i = --mpc->rxbpre_total; + dma_unmap_single(dev, mpc->das_pre[i], mpc->rxbpre_datasize, + DMA_FROM_DEVICE); + put_page(virt_to_head_page(mpc->rxbufs_pre[i])); + } + + kfree(mpc->das_pre); + mpc->das_pre = NULL; + +out2: + kfree(mpc->rxbufs_pre); + mpc->rxbufs_pre = NULL; + +out1: + mpc->rxbpre_datasize = 0; + mpc->rxbpre_alloc_size = 0; + mpc->rxbpre_headroom = 0; +} + +/* Get a buffer from the pre-allocated RX buffers */ +static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da) +{ + struct net_device *ndev = rxq->ndev; + struct mana_port_context *mpc; + void *va; + + mpc = netdev_priv(ndev); + + if (!mpc->rxbufs_pre || !mpc->das_pre || !mpc->rxbpre_total) { + netdev_err(ndev, "No RX pre-allocated bufs\n"); + return NULL; + } + + /* Check sizes to catch unexpected coding error */ + if (mpc->rxbpre_datasize != rxq->datasize) { + netdev_err(ndev, "rxbpre_datasize mismatch: %u: %u\n", + mpc->rxbpre_datasize, rxq->datasize); + return NULL; + } + + if (mpc->rxbpre_alloc_size != rxq->alloc_size) { + netdev_err(ndev, "rxbpre_alloc_size mismatch: %u: %u\n", + mpc->rxbpre_alloc_size, rxq->alloc_size); + return NULL; + } + + if (mpc->rxbpre_headroom != rxq->headroom) { + netdev_err(ndev, "rxbpre_headroom mismatch: %u: %u\n", + mpc->rxbpre_headroom, rxq->headroom); + return NULL; + } + + mpc->rxbpre_total--; + + *da = mpc->das_pre[mpc->rxbpre_total]; + va = mpc->rxbufs_pre[mpc->rxbpre_total]; + mpc->rxbufs_pre[mpc->rxbpre_total] = NULL; + + /* Deallocate the array after all buffers are gone */ + if (!mpc->rxbpre_total) + mana_pre_dealloc_rxbufs(mpc); + + return va; +} + +/* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ +static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, + u32 *headroom) +{ + if (mtu > MANA_XDP_MTU_MAX) + *headroom = 0; /* no support for XDP */ + else + *headroom = XDP_PACKET_HEADROOM; + + *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + + *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN); +} + +static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu) +{ + struct device *dev; + struct page *page; + dma_addr_t da; + int num_rxb; + void *va; + int i; + + mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize, + &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom); + + dev = mpc->ac->gdma_dev->gdma_context->dev; + + num_rxb = mpc->num_queues * RX_BUFFERS_PER_QUEUE; + + WARN(mpc->rxbufs_pre, "mana rxbufs_pre exists\n"); + mpc->rxbufs_pre = kmalloc_array(num_rxb, sizeof(void *), GFP_KERNEL); + if (!mpc->rxbufs_pre) + goto error; + + mpc->das_pre = kmalloc_array(num_rxb, sizeof(dma_addr_t), GFP_KERNEL); + if (!mpc->das_pre) + goto error; + + mpc->rxbpre_total = 0; + + for (i = 0; i < num_rxb; i++) { + if (mpc->rxbpre_alloc_size > PAGE_SIZE) { + va = netdev_alloc_frag(mpc->rxbpre_alloc_size); + if (!va) + goto error; + + page = virt_to_head_page(va); + /* Check if the frag falls back to single page */ + if (compound_order(page) < + get_order(mpc->rxbpre_alloc_size)) { + put_page(page); + goto error; + } + } else { + page = dev_alloc_page(); + if (!page) + goto error; + + va = page_to_virt(page); + } + + da = dma_map_single(dev, va + mpc->rxbpre_headroom, + mpc->rxbpre_datasize, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, da)) { + put_page(virt_to_head_page(va)); + goto error; + } + + mpc->rxbufs_pre[i] = va; + mpc->das_pre[i] = da; + mpc->rxbpre_total = i + 1; + } + + return 0; + +error: + mana_pre_dealloc_rxbufs(mpc); + return -ENOMEM; +} + +static int mana_change_mtu(struct net_device *ndev, int new_mtu) +{ + struct mana_port_context *mpc = netdev_priv(ndev); + unsigned int old_mtu = ndev->mtu; + int err; + + /* Pre-allocate buffers to prevent failure in mana_attach later */ + err = mana_pre_alloc_rxbufs(mpc, new_mtu); + if (err) { + netdev_err(ndev, "Insufficient memory for new MTU\n"); + return err; + } + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, "mana_detach failed: %d\n", err); + goto out; + } + + ndev->mtu = new_mtu; + + err = mana_attach(ndev); + if (err) { + netdev_err(ndev, "mana_attach failed: %d\n", err); + ndev->mtu = old_mtu; + } + +out: + mana_pre_dealloc_rxbufs(mpc); + return err; +} + +static const struct net_device_ops mana_devops = { + .ndo_open = mana_open, + .ndo_stop = mana_close, + .ndo_select_queue = mana_select_queue, + .ndo_start_xmit = mana_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_get_stats64 = mana_get_stats64, + .ndo_bpf = mana_bpf, + .ndo_xdp_xmit = mana_xdp_xmit, + .ndo_change_mtu = mana_change_mtu, +}; + +static void mana_cleanup_port_context(struct mana_port_context *apc) +{ + kfree(apc->rxqs); + apc->rxqs = NULL; +} + +static int mana_init_port_context(struct mana_port_context *apc) +{ + apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), + GFP_KERNEL); + + return !apc->rxqs ? -ENOMEM : 0; +} + +static int mana_send_request(struct mana_context *ac, void *in_buf, + u32 in_len, void *out_buf, u32 out_len) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct gdma_resp_hdr *resp = out_buf; + struct gdma_req_hdr *req = in_buf; + struct device *dev = gc->dev; + static atomic_t activity_id; + int err; + + req->dev_id = gc->mana.dev_id; + req->activity_id = atomic_inc_return(&activity_id); + + err = mana_gd_send_request(gc, in_len, in_buf, out_len, + out_buf); + if (err || resp->status) { + dev_err(dev, "Failed to send mana message: %d, 0x%x\n", + err, resp->status); + return err ? err : -EPROTO; + } + + if (req->dev_id.as_uint32 != resp->dev_id.as_uint32 || + req->activity_id != resp->activity_id) { + dev_err(dev, "Unexpected mana message response: %x,%x,%x,%x\n", + req->dev_id.as_uint32, resp->dev_id.as_uint32, + req->activity_id, resp->activity_id); + return -EPROTO; + } + + return 0; +} + +static int mana_verify_resp_hdr(const struct gdma_resp_hdr *resp_hdr, + const enum mana_command_code expected_code, + const u32 min_size) +{ + if (resp_hdr->response.msg_type != expected_code) + return -EPROTO; + + if (resp_hdr->response.msg_version < GDMA_MESSAGE_V1) + return -EPROTO; + + if (resp_hdr->response.msg_size < min_size) + return -EPROTO; + + return 0; +} + +static int mana_pf_register_hw_vport(struct mana_port_context *apc) +{ + struct mana_register_hw_vport_resp resp = {}; + struct mana_register_hw_vport_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_HW_PORT, + sizeof(req), sizeof(resp)); + req.attached_gfid = 1; + req.is_pf_default_vport = 1; + req.allow_all_ether_types = 1; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to register hw vPort: %d\n", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_HW_PORT, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to register hw vPort: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + apc->port_handle = resp.hw_vport_handle; + return 0; +} + +static void mana_pf_deregister_hw_vport(struct mana_port_context *apc) +{ + struct mana_deregister_hw_vport_resp resp = {}; + struct mana_deregister_hw_vport_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_HW_PORT, + sizeof(req), sizeof(resp)); + req.hw_vport_handle = apc->port_handle; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to unregister hw vPort: %d\n", + err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_HW_PORT, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(apc->ndev, + "Failed to deregister hw vPort: %d, 0x%x\n", + err, resp.hdr.status); +} + +static int mana_pf_register_filter(struct mana_port_context *apc) +{ + struct mana_register_filter_resp resp = {}; + struct mana_register_filter_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_REGISTER_FILTER, + sizeof(req), sizeof(resp)); + req.vport = apc->port_handle; + memcpy(req.mac_addr, apc->mac_addr, ETH_ALEN); + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to register filter: %d\n", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_REGISTER_FILTER, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to register filter: %d, 0x%x\n", + err, resp.hdr.status); + return err ? err : -EPROTO; + } + + apc->pf_filter_handle = resp.filter_handle; + return 0; +} + +static void mana_pf_deregister_filter(struct mana_port_context *apc) +{ + struct mana_deregister_filter_resp resp = {}; + struct mana_deregister_filter_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DEREGISTER_FILTER, + sizeof(req), sizeof(resp)); + req.filter_handle = apc->pf_filter_handle; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to unregister filter: %d\n", + err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DEREGISTER_FILTER, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(apc->ndev, + "Failed to deregister filter: %d, 0x%x\n", + err, resp.hdr.status); +} + +static int mana_query_device_cfg(struct mana_context *ac, u32 proto_major_ver, + u32 proto_minor_ver, u32 proto_micro_ver, + u16 *max_num_vports) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct mana_query_device_cfg_resp resp = {}; + struct mana_query_device_cfg_req req = {}; + struct device *dev = gc->dev; + int err = 0; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_DEV_CONFIG, + sizeof(req), sizeof(resp)); + + req.hdr.resp.msg_version = GDMA_MESSAGE_V2; + + req.proto_major_ver = proto_major_ver; + req.proto_minor_ver = proto_minor_ver; + req.proto_micro_ver = proto_micro_ver; + + err = mana_send_request(ac, &req, sizeof(req), &resp, sizeof(resp)); + if (err) { + dev_err(dev, "Failed to query config: %d", err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_DEV_CONFIG, + sizeof(resp)); + if (err || resp.hdr.status) { + dev_err(dev, "Invalid query result: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + return err; + } + + *max_num_vports = resp.max_num_vports; + + if (resp.hdr.response.msg_version == GDMA_MESSAGE_V2) + gc->adapter_mtu = resp.adapter_mtu; + else + gc->adapter_mtu = ETH_FRAME_LEN; + + return 0; +} + +static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, + u32 *max_sq, u32 *max_rq, u32 *num_indir_entry) +{ + struct mana_query_vport_cfg_resp resp = {}; + struct mana_query_vport_cfg_req req = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_VPORT_CONFIG, + sizeof(req), sizeof(resp)); + + req.vport_index = vport_index; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) + return err; + + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_VPORT_CONFIG, + sizeof(resp)); + if (err) + return err; + + if (resp.hdr.status) + return -EPROTO; + + *max_sq = resp.max_num_sq; + *max_rq = resp.max_num_rq; + *num_indir_entry = resp.num_indirection_ent; + + apc->port_handle = resp.vport; + ether_addr_copy(apc->mac_addr, resp.mac_addr); + + return 0; +} + +void mana_uncfg_vport(struct mana_port_context *apc) +{ + mutex_lock(&apc->vport_mutex); + apc->vport_use_count--; + WARN_ON(apc->vport_use_count < 0); + mutex_unlock(&apc->vport_mutex); +} +EXPORT_SYMBOL_NS(mana_uncfg_vport, NET_MANA); + +int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + u32 doorbell_pg_id) +{ + struct mana_config_vport_resp resp = {}; + struct mana_config_vport_req req = {}; + int err; + + /* This function is used to program the Ethernet port in the hardware + * table. It can be called from the Ethernet driver or the RDMA driver. + * + * For Ethernet usage, the hardware supports only one active user on a + * physical port. The driver checks on the port usage before programming + * the hardware when creating the RAW QP (RDMA driver) or exposing the + * device to kernel NET layer (Ethernet driver). + * + * Because the RDMA driver doesn't know in advance which QP type the + * user will create, it exposes the device with all its ports. The user + * may not be able to create RAW QP on a port if this port is already + * in used by the Ethernet driver from the kernel. + * + * This physical port limitation only applies to the RAW QP. For RC QP, + * the hardware doesn't have this limitation. The user can create RC + * QPs on a physical port up to the hardware limits independent of the + * Ethernet usage on the same port. + */ + mutex_lock(&apc->vport_mutex); + if (apc->vport_use_count > 0) { + mutex_unlock(&apc->vport_mutex); + return -EBUSY; + } + apc->vport_use_count++; + mutex_unlock(&apc->vport_mutex); + + mana_gd_init_req_hdr(&req.hdr, MANA_CONFIG_VPORT_TX, + sizeof(req), sizeof(resp)); + req.vport = apc->port_handle; + req.pdid = protection_dom_id; + req.doorbell_pageid = doorbell_pg_id; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to configure vPort: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_TX, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to configure vPort: %d, 0x%x\n", + err, resp.hdr.status); + if (!err) + err = -EPROTO; + + goto out; + } + + apc->tx_shortform_allowed = resp.short_form_allowed; + apc->tx_vp_offset = resp.tx_vport_offset; + + netdev_info(apc->ndev, "Configured vPort %llu PD %u DB %u\n", + apc->port_handle, protection_dom_id, doorbell_pg_id); +out: + if (err) + mana_uncfg_vport(apc); + + return err; +} +EXPORT_SYMBOL_NS(mana_cfg_vport, NET_MANA); + +static int mana_cfg_vport_steering(struct mana_port_context *apc, + enum TRI_STATE rx, + bool update_default_rxobj, bool update_key, + bool update_tab) +{ + u16 num_entries = MANA_INDIRECT_TABLE_SIZE; + struct mana_cfg_rx_steer_req_v2 *req; + struct mana_cfg_rx_steer_resp resp = {}; + struct net_device *ndev = apc->ndev; + mana_handle_t *req_indir_tab; + u32 req_buf_size; + int err; + + req_buf_size = sizeof(*req) + sizeof(mana_handle_t) * num_entries; + req = kzalloc(req_buf_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, + sizeof(resp)); + + req->hdr.req.msg_version = GDMA_MESSAGE_V2; + + req->vport = apc->port_handle; + req->num_indir_entries = num_entries; + req->indir_tab_offset = sizeof(*req); + req->rx_enable = rx; + req->rss_enable = apc->rss_state; + req->update_default_rxobj = update_default_rxobj; + req->update_hashkey = update_key; + req->update_indir_tab = update_tab; + req->default_rxobj = apc->default_rxobj; + req->cqe_coalescing_enable = 0; + + if (update_key) + memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); + + if (update_tab) { + req_indir_tab = (mana_handle_t *)(req + 1); + memcpy(req_indir_tab, apc->rxobj_table, + req->num_indir_entries * sizeof(mana_handle_t)); + } + + err = mana_send_request(apc->ac, req, req_buf_size, &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CONFIG_VPORT_RX, + sizeof(resp)); + if (err) { + netdev_err(ndev, "vPort RX configuration failed: %d\n", err); + goto out; + } + + if (resp.hdr.status) { + netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + } + + netdev_info(ndev, "Configured steering vPort %llu entries %u\n", + apc->port_handle, num_entries); +out: + kfree(req); + return err; +} + +int mana_create_wq_obj(struct mana_port_context *apc, + mana_handle_t vport, + u32 wq_type, struct mana_obj_spec *wq_spec, + struct mana_obj_spec *cq_spec, + mana_handle_t *wq_obj) +{ + struct mana_create_wqobj_resp resp = {}; + struct mana_create_wqobj_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_CREATE_WQ_OBJ, + sizeof(req), sizeof(resp)); + req.vport = vport; + req.wq_type = wq_type; + req.wq_gdma_region = wq_spec->gdma_region; + req.cq_gdma_region = cq_spec->gdma_region; + req.wq_size = wq_spec->queue_size; + req.cq_size = cq_spec->queue_size; + req.cq_moderation_ctx_id = cq_spec->modr_ctx_id; + req.cq_parent_qid = cq_spec->attached_eq; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to create WQ object: %d\n", err); + goto out; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_CREATE_WQ_OBJ, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(ndev, "Failed to create WQ object: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + goto out; + } + + if (resp.wq_obj == INVALID_MANA_HANDLE) { + netdev_err(ndev, "Got an invalid WQ object handle\n"); + err = -EPROTO; + goto out; + } + + *wq_obj = resp.wq_obj; + wq_spec->queue_index = resp.wq_id; + cq_spec->queue_index = resp.cq_id; + + return 0; +out: + return err; +} +EXPORT_SYMBOL_NS(mana_create_wq_obj, NET_MANA); + +void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + mana_handle_t wq_obj) +{ + struct mana_destroy_wqobj_resp resp = {}; + struct mana_destroy_wqobj_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_DESTROY_WQ_OBJ, + sizeof(req), sizeof(resp)); + req.wq_type = wq_type; + req.wq_obj_handle = wq_obj; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to destroy WQ object: %d\n", err); + return; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_DESTROY_WQ_OBJ, + sizeof(resp)); + if (err || resp.hdr.status) + netdev_err(ndev, "Failed to destroy WQ object: %d, 0x%x\n", err, + resp.hdr.status); +} +EXPORT_SYMBOL_NS(mana_destroy_wq_obj, NET_MANA); + +static void mana_destroy_eq(struct mana_context *ac) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct gdma_queue *eq; + int i; + + if (!ac->eqs) + return; + + for (i = 0; i < gc->max_num_queues; i++) { + eq = ac->eqs[i].eq; + if (!eq) + continue; + + mana_gd_destroy_queue(gc, eq); + } + + kfree(ac->eqs); + ac->eqs = NULL; +} + +static int mana_create_eq(struct mana_context *ac) +{ + struct gdma_dev *gd = ac->gdma_dev; + struct gdma_context *gc = gd->gdma_context; + struct gdma_queue_spec spec = {}; + int err; + int i; + + ac->eqs = kcalloc(gc->max_num_queues, sizeof(struct mana_eq), + GFP_KERNEL); + if (!ac->eqs) + return -ENOMEM; + + spec.type = GDMA_EQ; + spec.monitor_avl_buf = false; + spec.queue_size = EQ_SIZE; + spec.eq.callback = NULL; + spec.eq.context = ac->eqs; + spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; + + for (i = 0; i < gc->max_num_queues; i++) { + err = mana_gd_create_mana_eq(gd, &spec, &ac->eqs[i].eq); + if (err) + goto out; + } + + return 0; +out: + mana_destroy_eq(ac); + return err; +} + +static int mana_fence_rq(struct mana_port_context *apc, struct mana_rxq *rxq) +{ + struct mana_fence_rq_resp resp = {}; + struct mana_fence_rq_req req = {}; + int err; + + init_completion(&rxq->fence_event); + + mana_gd_init_req_hdr(&req.hdr, MANA_FENCE_RQ, + sizeof(req), sizeof(resp)); + req.wq_obj_handle = rxq->rxobj; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(apc->ndev, "Failed to fence RQ %u: %d\n", + rxq->rxq_idx, err); + return err; + } + + err = mana_verify_resp_hdr(&resp.hdr, MANA_FENCE_RQ, sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(apc->ndev, "Failed to fence RQ %u: %d, 0x%x\n", + rxq->rxq_idx, err, resp.hdr.status); + if (!err) + err = -EPROTO; + + return err; + } + + if (wait_for_completion_timeout(&rxq->fence_event, 10 * HZ) == 0) { + netdev_err(apc->ndev, "Failed to fence RQ %u: timed out\n", + rxq->rxq_idx); + return -ETIMEDOUT; + } + + return 0; +} + +static void mana_fence_rqs(struct mana_port_context *apc) +{ + unsigned int rxq_idx; + struct mana_rxq *rxq; + int err; + + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { + rxq = apc->rxqs[rxq_idx]; + err = mana_fence_rq(apc, rxq); + + /* In case of any error, use sleep instead. */ + if (err) + msleep(100); + } +} + +static int mana_move_wq_tail(struct gdma_queue *wq, u32 num_units) +{ + u32 used_space_old; + u32 used_space_new; + + used_space_old = wq->head - wq->tail; + used_space_new = wq->head - (wq->tail + num_units); + + if (WARN_ON_ONCE(used_space_new > used_space_old)) + return -ERANGE; + + wq->tail += num_units; + return 0; +} + +static void mana_unmap_skb(struct sk_buff *skb, struct mana_port_context *apc) +{ + struct mana_skb_head *ash = (struct mana_skb_head *)skb->head; + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct device *dev = gc->dev; + int hsg, i; + + /* Number of SGEs of linear part */ + hsg = (skb_is_gso(skb) && skb_headlen(skb) > ash->size[0]) ? 2 : 1; + + for (i = 0; i < hsg; i++) + dma_unmap_single(dev, ash->dma_handle[i], ash->size[i], + DMA_TO_DEVICE); + + for (i = hsg; i < skb_shinfo(skb)->nr_frags + hsg; i++) + dma_unmap_page(dev, ash->dma_handle[i], ash->size[i], + DMA_TO_DEVICE); +} + +static void mana_poll_tx_cq(struct mana_cq *cq) +{ + struct gdma_comp *completions = cq->gdma_comp_buf; + struct gdma_posted_wqe_info *wqe_info; + unsigned int pkt_transmitted = 0; + unsigned int wqe_unit_cnt = 0; + struct mana_txq *txq = cq->txq; + struct mana_port_context *apc; + struct netdev_queue *net_txq; + struct gdma_queue *gdma_wq; + unsigned int avail_space; + struct net_device *ndev; + struct sk_buff *skb; + bool txq_stopped; + int comp_read; + int i; + + ndev = txq->ndev; + apc = netdev_priv(ndev); + + comp_read = mana_gd_poll_cq(cq->gdma_cq, completions, + CQE_POLLING_BUFFER); + + if (comp_read < 1) + return; + + for (i = 0; i < comp_read; i++) { + struct mana_tx_comp_oob *cqe_oob; + + if (WARN_ON_ONCE(!completions[i].is_sq)) + return; + + cqe_oob = (struct mana_tx_comp_oob *)completions[i].cqe_data; + if (WARN_ON_ONCE(cqe_oob->cqe_hdr.client_type != + MANA_CQE_COMPLETION)) + return; + + switch (cqe_oob->cqe_hdr.cqe_type) { + case CQE_TX_OKAY: + break; + + case CQE_TX_SA_DROP: + case CQE_TX_MTU_DROP: + case CQE_TX_INVALID_OOB: + case CQE_TX_INVALID_ETH_TYPE: + case CQE_TX_HDR_PROCESSING_ERROR: + case CQE_TX_VF_DISABLED: + case CQE_TX_VPORT_IDX_OUT_OF_RANGE: + case CQE_TX_VPORT_DISABLED: + case CQE_TX_VLAN_TAGGING_VIOLATION: + if (net_ratelimit()) + netdev_err(ndev, "TX: CQE error %d\n", + cqe_oob->cqe_hdr.cqe_type); + + apc->eth_stats.tx_cqe_err++; + break; + + default: + /* If the CQE type is unknown, log an error, + * and still free the SKB, update tail, etc. + */ + if (net_ratelimit()) + netdev_err(ndev, "TX: unknown CQE type %d\n", + cqe_oob->cqe_hdr.cqe_type); + + apc->eth_stats.tx_cqe_unknown_type++; + break; + } + + if (WARN_ON_ONCE(txq->gdma_txq_id != completions[i].wq_num)) + return; + + skb = skb_dequeue(&txq->pending_skbs); + if (WARN_ON_ONCE(!skb)) + return; + + wqe_info = (struct gdma_posted_wqe_info *)skb->cb; + wqe_unit_cnt += wqe_info->wqe_size_in_bu; + + mana_unmap_skb(skb, apc); + + napi_consume_skb(skb, cq->budget); + + pkt_transmitted++; + } + + if (WARN_ON_ONCE(wqe_unit_cnt == 0)) + return; + + mana_move_wq_tail(txq->gdma_sq, wqe_unit_cnt); + + gdma_wq = txq->gdma_sq; + avail_space = mana_gd_wq_avail_space(gdma_wq); + + /* Ensure tail updated before checking q stop */ + smp_mb(); + + net_txq = txq->net_txq; + txq_stopped = netif_tx_queue_stopped(net_txq); + + /* Ensure checking txq_stopped before apc->port_is_up. */ + smp_rmb(); + + if (txq_stopped && apc->port_is_up && avail_space >= MAX_TX_WQE_SIZE) { + netif_tx_wake_queue(net_txq); + apc->eth_stats.wake_queue++; + } + + if (atomic_sub_return(pkt_transmitted, &txq->pending_sends) < 0) + WARN_ON_ONCE(1); + + cq->work_done = pkt_transmitted; +} + +static void mana_post_pkt_rxq(struct mana_rxq *rxq) +{ + struct mana_recv_buf_oob *recv_buf_oob; + u32 curr_index; + int err; + + curr_index = rxq->buf_index++; + if (rxq->buf_index == rxq->num_rx_buf) + rxq->buf_index = 0; + + recv_buf_oob = &rxq->rx_oobs[curr_index]; + + err = mana_gd_post_work_request(rxq->gdma_rq, &recv_buf_oob->wqe_req, + &recv_buf_oob->wqe_inf); + if (WARN_ON_ONCE(err)) + return; + + WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1); +} + +static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va, + uint pkt_len, struct xdp_buff *xdp) +{ + struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size); + + if (!skb) + return NULL; + + if (xdp->data_hard_start) { + skb_reserve(skb, xdp->data - xdp->data_hard_start); + skb_put(skb, xdp->data_end - xdp->data); + return skb; + } + + skb_reserve(skb, rxq->headroom); + skb_put(skb, pkt_len); + + return skb; +} + +static void mana_rx_skb(void *buf_va, bool from_pool, + struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq) +{ + struct mana_stats_rx *rx_stats = &rxq->stats; + struct net_device *ndev = rxq->ndev; + uint pkt_len = cqe->ppi[0].pkt_len; + u16 rxq_idx = rxq->rxq_idx; + struct napi_struct *napi; + struct xdp_buff xdp = {}; + struct sk_buff *skb; + u32 hash_value; + u32 act; + + rxq->rx_cq.work_done++; + napi = &rxq->rx_cq.napi; + + if (!buf_va) { + ++ndev->stats.rx_dropped; + return; + } + + act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len); + + if (act == XDP_REDIRECT && !rxq->xdp_rc) + return; + + if (act != XDP_PASS && act != XDP_TX) + goto drop_xdp; + + skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp); + + if (!skb) + goto drop; + + if (from_pool) + skb_mark_for_recycle(skb); + + skb->dev = napi->dev; + + skb->protocol = eth_type_trans(skb, ndev); + skb_checksum_none_assert(skb); + skb_record_rx_queue(skb, rxq_idx); + + if ((ndev->features & NETIF_F_RXCSUM) && cqe->rx_iphdr_csum_succeed) { + if (cqe->rx_tcp_csum_succeed || cqe->rx_udp_csum_succeed) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + if (cqe->rx_hashtype != 0 && (ndev->features & NETIF_F_RXHASH)) { + hash_value = cqe->ppi[0].pkt_hash; + + if (cqe->rx_hashtype & MANA_HASH_L4) + skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L4); + else + skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); + } + + if (cqe->rx_vlantag_present) { + u16 vlan_tci = cqe->rx_vlan_id; + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + } + + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->packets++; + rx_stats->bytes += pkt_len; + + if (act == XDP_TX) + rx_stats->xdp_tx++; + u64_stats_update_end(&rx_stats->syncp); + + if (act == XDP_TX) { + skb_set_queue_mapping(skb, rxq_idx); + mana_xdp_tx(skb, ndev); + return; + } + + napi_gro_receive(napi, skb); + + return; + +drop_xdp: + u64_stats_update_begin(&rx_stats->syncp); + rx_stats->xdp_drop++; + u64_stats_update_end(&rx_stats->syncp); + +drop: + if (from_pool) { + page_pool_recycle_direct(rxq->page_pool, + virt_to_head_page(buf_va)); + } else { + WARN_ON_ONCE(rxq->xdp_save_va); + /* Save for reuse */ + rxq->xdp_save_va = buf_va; + } + + ++ndev->stats.rx_dropped; + + return; +} + +static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, + dma_addr_t *da, bool *from_pool, bool is_napi) +{ + struct page *page; + void *va; + + *from_pool = false; + + /* Reuse XDP dropped page if available */ + if (rxq->xdp_save_va) { + va = rxq->xdp_save_va; + rxq->xdp_save_va = NULL; + } else if (rxq->alloc_size > PAGE_SIZE) { + if (is_napi) + va = napi_alloc_frag(rxq->alloc_size); + else + va = netdev_alloc_frag(rxq->alloc_size); + + if (!va) + return NULL; + + page = virt_to_head_page(va); + /* Check if the frag falls back to single page */ + if (compound_order(page) < get_order(rxq->alloc_size)) { + put_page(page); + return NULL; + } + } else { + page = page_pool_dev_alloc_pages(rxq->page_pool); + if (!page) + return NULL; + + *from_pool = true; + va = page_to_virt(page); + } + + *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, *da)) { + if (*from_pool) + page_pool_put_full_page(rxq->page_pool, page, false); + else + put_page(virt_to_head_page(va)); + + return NULL; + } + + return va; +} + +/* Allocate frag for rx buffer, and save the old buf */ +static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, + struct mana_recv_buf_oob *rxoob, void **old_buf, + bool *old_fp) +{ + bool from_pool; + dma_addr_t da; + void *va; + + va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true); + if (!va) + return; + + dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, + DMA_FROM_DEVICE); + *old_buf = rxoob->buf_va; + *old_fp = rxoob->from_pool; + + rxoob->buf_va = va; + rxoob->sgl[0].address = da; + rxoob->from_pool = from_pool; +} + +static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq, + struct gdma_comp *cqe) +{ + struct mana_rxcomp_oob *oob = (struct mana_rxcomp_oob *)cqe->cqe_data; + struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; + struct net_device *ndev = rxq->ndev; + struct mana_recv_buf_oob *rxbuf_oob; + struct mana_port_context *apc; + struct device *dev = gc->dev; + void *old_buf = NULL; + u32 curr, pktlen; + bool old_fp; + + apc = netdev_priv(ndev); + + switch (oob->cqe_hdr.cqe_type) { + case CQE_RX_OKAY: + break; + + case CQE_RX_TRUNCATED: + ++ndev->stats.rx_dropped; + rxbuf_oob = &rxq->rx_oobs[rxq->buf_index]; + netdev_warn_once(ndev, "Dropped a truncated packet\n"); + goto drop; + + case CQE_RX_COALESCED_4: + netdev_err(ndev, "RX coalescing is unsupported\n"); + apc->eth_stats.rx_coalesced_err++; + return; + + case CQE_RX_OBJECT_FENCE: + complete(&rxq->fence_event); + return; + + default: + netdev_err(ndev, "Unknown RX CQE type = %d\n", + oob->cqe_hdr.cqe_type); + apc->eth_stats.rx_cqe_unknown_type++; + return; + } + + pktlen = oob->ppi[0].pkt_len; + + if (pktlen == 0) { + /* data packets should never have packetlength of zero */ + netdev_err(ndev, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n", + rxq->gdma_id, cq->gdma_id, rxq->rxobj); + return; + } + + curr = rxq->buf_index; + rxbuf_oob = &rxq->rx_oobs[curr]; + WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1); + + mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp); + + /* Unsuccessful refill will have old_buf == NULL. + * In this case, mana_rx_skb() will drop the packet. + */ + mana_rx_skb(old_buf, old_fp, oob, rxq); + +drop: + mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu); + + mana_post_pkt_rxq(rxq); +} + +static void mana_poll_rx_cq(struct mana_cq *cq) +{ + struct gdma_comp *comp = cq->gdma_comp_buf; + struct mana_rxq *rxq = cq->rxq; + int comp_read, i; + + comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); + WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); + + rxq->xdp_flush = false; + + for (i = 0; i < comp_read; i++) { + if (WARN_ON_ONCE(comp[i].is_sq)) + return; + + /* verify recv cqe references the right rxq */ + if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id)) + return; + + mana_process_rx_cqe(rxq, cq, &comp[i]); + } + + if (comp_read > 0) { + struct gdma_context *gc = rxq->gdma_rq->gdma_dev->gdma_context; + + mana_gd_wq_ring_doorbell(gc, rxq->gdma_rq); + } + + if (rxq->xdp_flush) + xdp_do_flush(); +} + +static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) +{ + struct mana_cq *cq = context; + u8 arm_bit; + int w; + + WARN_ON_ONCE(cq->gdma_cq != gdma_queue); + + if (cq->type == MANA_CQ_TYPE_RX) + mana_poll_rx_cq(cq); + else + mana_poll_tx_cq(cq); + + w = cq->work_done; + + if (w < cq->budget && + napi_complete_done(&cq->napi, w)) { + arm_bit = SET_ARM_BIT; + } else { + arm_bit = 0; + } + + mana_gd_ring_cq(gdma_queue, arm_bit); + + return w; +} + +static int mana_poll(struct napi_struct *napi, int budget) +{ + struct mana_cq *cq = container_of(napi, struct mana_cq, napi); + int w; + + cq->work_done = 0; + cq->budget = budget; + + w = mana_cq_handler(cq, cq->gdma_cq); + + return min(w, budget); +} + +static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) +{ + struct mana_cq *cq = context; + + napi_schedule_irqoff(&cq->napi); +} + +static void mana_deinit_cq(struct mana_port_context *apc, struct mana_cq *cq) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + + if (!cq->gdma_cq) + return; + + mana_gd_destroy_queue(gd->gdma_context, cq->gdma_cq); +} + +static void mana_deinit_txq(struct mana_port_context *apc, struct mana_txq *txq) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + + if (!txq->gdma_sq) + return; + + mana_gd_destroy_queue(gd->gdma_context, txq->gdma_sq); +} + +static void mana_destroy_txq(struct mana_port_context *apc) +{ + struct napi_struct *napi; + int i; + + if (!apc->tx_qp) + return; + + for (i = 0; i < apc->num_queues; i++) { + napi = &apc->tx_qp[i].tx_cq.napi; + napi_synchronize(napi); + napi_disable(napi); + netif_napi_del(napi); + + mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); + + mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); + + mana_deinit_txq(apc, &apc->tx_qp[i].txq); + } + + kfree(apc->tx_qp); + apc->tx_qp = NULL; +} + +static int mana_create_txq(struct mana_port_context *apc, + struct net_device *net) +{ + struct mana_context *ac = apc->ac; + struct gdma_dev *gd = ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; + struct gdma_queue_spec spec; + struct gdma_context *gc; + struct mana_txq *txq; + struct mana_cq *cq; + u32 txq_size; + u32 cq_size; + int err; + int i; + + apc->tx_qp = kcalloc(apc->num_queues, sizeof(struct mana_tx_qp), + GFP_KERNEL); + if (!apc->tx_qp) + return -ENOMEM; + + /* The minimum size of the WQE is 32 bytes, hence + * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs + * the SQ can store. This value is then used to size other queues + * to prevent overflow. + */ + txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; + BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + + cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; + cq_size = PAGE_ALIGN(cq_size); + + gc = gd->gdma_context; + + for (i = 0; i < apc->num_queues; i++) { + apc->tx_qp[i].tx_object = INVALID_MANA_HANDLE; + + /* Create SQ */ + txq = &apc->tx_qp[i].txq; + + u64_stats_init(&txq->stats.syncp); + txq->ndev = net; + txq->net_txq = netdev_get_tx_queue(net, i); + txq->vp_offset = apc->tx_vp_offset; + skb_queue_head_init(&txq->pending_skbs); + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_SQ; + spec.monitor_avl_buf = true; + spec.queue_size = txq_size; + err = mana_gd_create_mana_wq_cq(gd, &spec, &txq->gdma_sq); + if (err) + goto out; + + /* Create SQ's CQ */ + cq = &apc->tx_qp[i].tx_cq; + cq->type = MANA_CQ_TYPE_TX; + + cq->txq = txq; + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; + spec.cq.callback = mana_schedule_napi; + spec.cq.parent_eq = ac->eqs[i].eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); + if (err) + goto out; + + memset(&wq_spec, 0, sizeof(wq_spec)); + memset(&cq_spec, 0, sizeof(cq_spec)); + + wq_spec.gdma_region = txq->gdma_sq->mem_info.dma_region_handle; + wq_spec.queue_size = txq->gdma_sq->queue_size; + + cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; + cq_spec.queue_size = cq->gdma_cq->queue_size; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; + + err = mana_create_wq_obj(apc, apc->port_handle, GDMA_SQ, + &wq_spec, &cq_spec, + &apc->tx_qp[i].tx_object); + + if (err) + goto out; + + txq->gdma_sq->id = wq_spec.queue_index; + cq->gdma_cq->id = cq_spec.queue_index; + + txq->gdma_sq->mem_info.dma_region_handle = + GDMA_INVALID_DMA_REGION; + cq->gdma_cq->mem_info.dma_region_handle = + GDMA_INVALID_DMA_REGION; + + txq->gdma_txq_id = txq->gdma_sq->id; + + cq->gdma_id = cq->gdma_cq->id; + + if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { + err = -EINVAL; + goto out; + } + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + + netif_napi_add_tx(net, &cq->napi, mana_poll); + napi_enable(&cq->napi); + + mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); + } + + return 0; +out: + mana_destroy_txq(apc); + return err; +} + +static void mana_destroy_rxq(struct mana_port_context *apc, + struct mana_rxq *rxq, bool validate_state) + +{ + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct mana_recv_buf_oob *rx_oob; + struct device *dev = gc->dev; + struct napi_struct *napi; + struct page *page; + int i; + + if (!rxq) + return; + + napi = &rxq->rx_cq.napi; + + if (validate_state) + napi_synchronize(napi); + + napi_disable(napi); + + xdp_rxq_info_unreg(&rxq->xdp_rxq); + + netif_napi_del(napi); + + mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); + + mana_deinit_cq(apc, &rxq->rx_cq); + + if (rxq->xdp_save_va) + put_page(virt_to_head_page(rxq->xdp_save_va)); + + for (i = 0; i < rxq->num_rx_buf; i++) { + rx_oob = &rxq->rx_oobs[i]; + + if (!rx_oob->buf_va) + continue; + + dma_unmap_single(dev, rx_oob->sgl[0].address, + rx_oob->sgl[0].size, DMA_FROM_DEVICE); + + page = virt_to_head_page(rx_oob->buf_va); + + if (rx_oob->from_pool) + page_pool_put_full_page(rxq->page_pool, page, false); + else + put_page(page); + + rx_oob->buf_va = NULL; + } + + page_pool_destroy(rxq->page_pool); + + if (rxq->gdma_rq) + mana_gd_destroy_queue(gc, rxq->gdma_rq); + + kfree(rxq); +} + +static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key, + struct mana_rxq *rxq, struct device *dev) +{ + struct mana_port_context *mpc = netdev_priv(rxq->ndev); + bool from_pool = false; + dma_addr_t da; + void *va; + + if (mpc->rxbufs_pre) + va = mana_get_rxbuf_pre(rxq, &da); + else + va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false); + + if (!va) + return -ENOMEM; + + rx_oob->buf_va = va; + rx_oob->from_pool = from_pool; + + rx_oob->sgl[0].address = da; + rx_oob->sgl[0].size = rxq->datasize; + rx_oob->sgl[0].mem_key = mem_key; + + return 0; +} + +#define MANA_WQE_HEADER_SIZE 16 +#define MANA_WQE_SGE_SIZE 16 + +static int mana_alloc_rx_wqe(struct mana_port_context *apc, + struct mana_rxq *rxq, u32 *rxq_size, u32 *cq_size) +{ + struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; + struct mana_recv_buf_oob *rx_oob; + struct device *dev = gc->dev; + u32 buf_idx; + int ret; + + WARN_ON(rxq->datasize == 0); + + *rxq_size = 0; + *cq_size = 0; + + for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { + rx_oob = &rxq->rx_oobs[buf_idx]; + memset(rx_oob, 0, sizeof(*rx_oob)); + + rx_oob->num_sge = 1; + + ret = mana_fill_rx_oob(rx_oob, apc->ac->gdma_dev->gpa_mkey, rxq, + dev); + if (ret) + return ret; + + rx_oob->wqe_req.sgl = rx_oob->sgl; + rx_oob->wqe_req.num_sge = rx_oob->num_sge; + rx_oob->wqe_req.inline_oob_size = 0; + rx_oob->wqe_req.inline_oob_data = NULL; + rx_oob->wqe_req.flags = 0; + rx_oob->wqe_req.client_data_unit = 0; + + *rxq_size += ALIGN(MANA_WQE_HEADER_SIZE + + MANA_WQE_SGE_SIZE * rx_oob->num_sge, 32); + *cq_size += COMP_ENTRY_SIZE; + } + + return 0; +} + +static int mana_push_wqe(struct mana_rxq *rxq) +{ + struct mana_recv_buf_oob *rx_oob; + u32 buf_idx; + int err; + + for (buf_idx = 0; buf_idx < rxq->num_rx_buf; buf_idx++) { + rx_oob = &rxq->rx_oobs[buf_idx]; + + err = mana_gd_post_and_ring(rxq->gdma_rq, &rx_oob->wqe_req, + &rx_oob->wqe_inf); + if (err) + return -ENOSPC; + } + + return 0; +} + +static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) +{ + struct page_pool_params pprm = {}; + int ret; + + pprm.pool_size = RX_BUFFERS_PER_QUEUE; + pprm.nid = gc->numa_node; + pprm.napi = &rxq->rx_cq.napi; + + rxq->page_pool = page_pool_create(&pprm); + + if (IS_ERR(rxq->page_pool)) { + ret = PTR_ERR(rxq->page_pool); + rxq->page_pool = NULL; + return ret; + } + + return 0; +} + +static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, + u32 rxq_idx, struct mana_eq *eq, + struct net_device *ndev) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_obj_spec wq_spec; + struct mana_obj_spec cq_spec; + struct gdma_queue_spec spec; + struct mana_cq *cq = NULL; + struct gdma_context *gc; + u32 cq_size, rq_size; + struct mana_rxq *rxq; + int err; + + gc = gd->gdma_context; + + rxq = kzalloc(struct_size(rxq, rx_oobs, RX_BUFFERS_PER_QUEUE), + GFP_KERNEL); + if (!rxq) + return NULL; + + rxq->ndev = ndev; + rxq->num_rx_buf = RX_BUFFERS_PER_QUEUE; + rxq->rxq_idx = rxq_idx; + rxq->rxobj = INVALID_MANA_HANDLE; + + mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, + &rxq->headroom); + + /* Create page pool for RX queue */ + err = mana_create_page_pool(rxq, gc); + if (err) { + netdev_err(ndev, "Create page pool err:%d\n", err); + goto out; + } + + err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size); + if (err) + goto out; + + rq_size = PAGE_ALIGN(rq_size); + cq_size = PAGE_ALIGN(cq_size); + + /* Create RQ */ + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_RQ; + spec.monitor_avl_buf = true; + spec.queue_size = rq_size; + err = mana_gd_create_mana_wq_cq(gd, &spec, &rxq->gdma_rq); + if (err) + goto out; + + /* Create RQ's CQ */ + cq = &rxq->rx_cq; + cq->type = MANA_CQ_TYPE_RX; + cq->rxq = rxq; + + memset(&spec, 0, sizeof(spec)); + spec.type = GDMA_CQ; + spec.monitor_avl_buf = false; + spec.queue_size = cq_size; + spec.cq.callback = mana_schedule_napi; + spec.cq.parent_eq = eq->eq; + spec.cq.context = cq; + err = mana_gd_create_mana_wq_cq(gd, &spec, &cq->gdma_cq); + if (err) + goto out; + + memset(&wq_spec, 0, sizeof(wq_spec)); + memset(&cq_spec, 0, sizeof(cq_spec)); + wq_spec.gdma_region = rxq->gdma_rq->mem_info.dma_region_handle; + wq_spec.queue_size = rxq->gdma_rq->queue_size; + + cq_spec.gdma_region = cq->gdma_cq->mem_info.dma_region_handle; + cq_spec.queue_size = cq->gdma_cq->queue_size; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = cq->gdma_cq->cq.parent->id; + + err = mana_create_wq_obj(apc, apc->port_handle, GDMA_RQ, + &wq_spec, &cq_spec, &rxq->rxobj); + if (err) + goto out; + + rxq->gdma_rq->id = wq_spec.queue_index; + cq->gdma_cq->id = cq_spec.queue_index; + + rxq->gdma_rq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; + cq->gdma_cq->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION; + + rxq->gdma_id = rxq->gdma_rq->id; + cq->gdma_id = cq->gdma_cq->id; + + err = mana_push_wqe(rxq); + if (err) + goto out; + + if (WARN_ON(cq->gdma_id >= gc->max_num_cqs)) { + err = -EINVAL; + goto out; + } + + gc->cq_table[cq->gdma_id] = cq->gdma_cq; + + netif_napi_add_weight(ndev, &cq->napi, mana_poll, 1); + + WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, + cq->napi.napi_id)); + WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, + rxq->page_pool)); + + napi_enable(&cq->napi); + + mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); +out: + if (!err) + return rxq; + + netdev_err(ndev, "Failed to create RXQ: err = %d\n", err); + + mana_destroy_rxq(apc, rxq, false); + + if (cq) + mana_deinit_cq(apc, cq); + + return NULL; +} + +static int mana_add_rx_queues(struct mana_port_context *apc, + struct net_device *ndev) +{ + struct mana_context *ac = apc->ac; + struct mana_rxq *rxq; + int err = 0; + int i; + + for (i = 0; i < apc->num_queues; i++) { + rxq = mana_create_rxq(apc, i, &ac->eqs[i], ndev); + if (!rxq) { + err = -ENOMEM; + goto out; + } + + u64_stats_init(&rxq->stats.syncp); + + apc->rxqs[i] = rxq; + } + + apc->default_rxobj = apc->rxqs[0]->rxobj; +out: + return err; +} + +static void mana_destroy_vport(struct mana_port_context *apc) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_rxq *rxq; + u32 rxq_idx; + + for (rxq_idx = 0; rxq_idx < apc->num_queues; rxq_idx++) { + rxq = apc->rxqs[rxq_idx]; + if (!rxq) + continue; + + mana_destroy_rxq(apc, rxq, true); + apc->rxqs[rxq_idx] = NULL; + } + + mana_destroy_txq(apc); + mana_uncfg_vport(apc); + + if (gd->gdma_context->is_pf) + mana_pf_deregister_hw_vport(apc); +} + +static int mana_create_vport(struct mana_port_context *apc, + struct net_device *net) +{ + struct gdma_dev *gd = apc->ac->gdma_dev; + int err; + + apc->default_rxobj = INVALID_MANA_HANDLE; + + if (gd->gdma_context->is_pf) { + err = mana_pf_register_hw_vport(apc); + if (err) + return err; + } + + err = mana_cfg_vport(apc, gd->pdid, gd->doorbell); + if (err) + return err; + + return mana_create_txq(apc, net); +} + +static void mana_rss_table_init(struct mana_port_context *apc) +{ + int i; + + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + apc->indir_table[i] = + ethtool_rxfh_indir_default(i, apc->num_queues); +} + +int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, + bool update_hash, bool update_tab) +{ + u32 queue_idx; + int err; + int i; + + if (update_tab) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + queue_idx = apc->indir_table[i]; + apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; + } + } + + err = mana_cfg_vport_steering(apc, rx, true, update_hash, update_tab); + if (err) + return err; + + mana_fence_rqs(apc); + + return 0; +} + +void mana_query_gf_stats(struct mana_port_context *apc) +{ + struct mana_query_gf_stat_resp resp = {}; + struct mana_query_gf_stat_req req = {}; + struct net_device *ndev = apc->ndev; + int err; + + mana_gd_init_req_hdr(&req.hdr, MANA_QUERY_GF_STAT, + sizeof(req), sizeof(resp)); + req.req_stats = STATISTICS_FLAGS_HC_TX_BYTES | + STATISTICS_FLAGS_HC_TX_UCAST_PACKETS | + STATISTICS_FLAGS_HC_TX_UCAST_BYTES | + STATISTICS_FLAGS_HC_TX_MCAST_PACKETS | + STATISTICS_FLAGS_HC_TX_MCAST_BYTES | + STATISTICS_FLAGS_HC_TX_BCAST_PACKETS | + STATISTICS_FLAGS_HC_TX_BCAST_BYTES; + + err = mana_send_request(apc->ac, &req, sizeof(req), &resp, + sizeof(resp)); + if (err) { + netdev_err(ndev, "Failed to query GF stats: %d\n", err); + return; + } + err = mana_verify_resp_hdr(&resp.hdr, MANA_QUERY_GF_STAT, + sizeof(resp)); + if (err || resp.hdr.status) { + netdev_err(ndev, "Failed to query GF stats: %d, 0x%x\n", err, + resp.hdr.status); + return; + } + + apc->eth_stats.hc_tx_bytes = resp.hc_tx_bytes; + apc->eth_stats.hc_tx_ucast_pkts = resp.hc_tx_ucast_pkts; + apc->eth_stats.hc_tx_ucast_bytes = resp.hc_tx_ucast_bytes; + apc->eth_stats.hc_tx_bcast_pkts = resp.hc_tx_bcast_pkts; + apc->eth_stats.hc_tx_bcast_bytes = resp.hc_tx_bcast_bytes; + apc->eth_stats.hc_tx_mcast_pkts = resp.hc_tx_mcast_pkts; + apc->eth_stats.hc_tx_mcast_bytes = resp.hc_tx_mcast_bytes; +} + +static int mana_init_port(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + u32 max_txq, max_rxq, max_queues; + int port_idx = apc->port_idx; + u32 num_indirect_entries; + int err; + + err = mana_init_port_context(apc); + if (err) + return err; + + err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, + &num_indirect_entries); + if (err) { + netdev_err(ndev, "Failed to query info for vPort %d\n", + port_idx); + goto reset_apc; + } + + max_queues = min_t(u32, max_txq, max_rxq); + if (apc->max_queues > max_queues) + apc->max_queues = max_queues; + + if (apc->num_queues > apc->max_queues) + apc->num_queues = apc->max_queues; + + eth_hw_addr_set(ndev, apc->mac_addr); + + return 0; + +reset_apc: + kfree(apc->rxqs); + apc->rxqs = NULL; + return err; +} + +int mana_alloc_queues(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + struct gdma_dev *gd = apc->ac->gdma_dev; + int err; + + err = mana_create_vport(apc, ndev); + if (err) + return err; + + err = netif_set_real_num_tx_queues(ndev, apc->num_queues); + if (err) + goto destroy_vport; + + err = mana_add_rx_queues(apc, ndev); + if (err) + goto destroy_vport; + + apc->rss_state = apc->num_queues > 1 ? TRI_STATE_TRUE : TRI_STATE_FALSE; + + err = netif_set_real_num_rx_queues(ndev, apc->num_queues); + if (err) + goto destroy_vport; + + mana_rss_table_init(apc); + + err = mana_config_rss(apc, TRI_STATE_TRUE, true, true); + if (err) + goto destroy_vport; + + if (gd->gdma_context->is_pf) { + err = mana_pf_register_filter(apc); + if (err) + goto destroy_vport; + } + + mana_chn_setxdp(apc, mana_xdp_get(apc)); + + return 0; + +destroy_vport: + mana_destroy_vport(apc); + return err; +} + +int mana_attach(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + ASSERT_RTNL(); + + err = mana_init_port(ndev); + if (err) + return err; + + if (apc->port_st_save) { + err = mana_alloc_queues(ndev); + if (err) { + mana_cleanup_port_context(apc); + return err; + } + } + + apc->port_is_up = apc->port_st_save; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + if (apc->port_is_up) + netif_carrier_on(ndev); + + netif_device_attach(ndev); + + return 0; +} + +static int mana_dealloc_queues(struct net_device *ndev) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned long timeout = jiffies + 120 * HZ; + struct gdma_dev *gd = apc->ac->gdma_dev; + struct mana_txq *txq; + struct sk_buff *skb; + int i, err; + u32 tsleep; + + if (apc->port_is_up) + return -EINVAL; + + mana_chn_setxdp(apc, NULL); + + if (gd->gdma_context->is_pf) + mana_pf_deregister_filter(apc); + + /* No packet can be transmitted now since apc->port_is_up is false. + * There is still a tiny chance that mana_poll_tx_cq() can re-enable + * a txq because it may not timely see apc->port_is_up being cleared + * to false, but it doesn't matter since mana_start_xmit() drops any + * new packets due to apc->port_is_up being false. + * + * Drain all the in-flight TX packets. + * A timeout of 120 seconds for all the queues is used. + * This will break the while loop when h/w is not responding. + * This value of 120 has been decided here considering max + * number of queues. + */ + + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + tsleep = 1000; + while (atomic_read(&txq->pending_sends) > 0 && + time_before(jiffies, timeout)) { + usleep_range(tsleep, tsleep + 1000); + tsleep <<= 1; + } + if (atomic_read(&txq->pending_sends)) { + err = pcie_flr(to_pci_dev(gd->gdma_context->dev)); + if (err) { + netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n", + err, atomic_read(&txq->pending_sends), + txq->gdma_txq_id); + } + break; + } + } + + for (i = 0; i < apc->num_queues; i++) { + txq = &apc->tx_qp[i].txq; + while ((skb = skb_dequeue(&txq->pending_skbs))) { + mana_unmap_skb(skb, apc); + dev_kfree_skb_any(skb); + } + atomic_set(&txq->pending_sends, 0); + } + /* We're 100% sure the queues can no longer be woken up, because + * we're sure now mana_poll_tx_cq() can't be running. + */ + + apc->rss_state = TRI_STATE_FALSE; + err = mana_config_rss(apc, TRI_STATE_FALSE, false, false); + if (err) { + netdev_err(ndev, "Failed to disable vPort: %d\n", err); + return err; + } + + mana_destroy_vport(apc); + + return 0; +} + +int mana_detach(struct net_device *ndev, bool from_close) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int err; + + ASSERT_RTNL(); + + apc->port_st_save = apc->port_is_up; + apc->port_is_up = false; + + /* Ensure port state updated before txq state */ + smp_wmb(); + + netif_tx_disable(ndev); + netif_carrier_off(ndev); + + if (apc->port_st_save) { + err = mana_dealloc_queues(ndev); + if (err) + return err; + } + + if (!from_close) { + netif_device_detach(ndev); + mana_cleanup_port_context(apc); + } + + return 0; +} + +static int mana_probe_port(struct mana_context *ac, int port_idx, + struct net_device **ndev_storage) +{ + struct gdma_context *gc = ac->gdma_dev->gdma_context; + struct mana_port_context *apc; + struct net_device *ndev; + int err; + + ndev = alloc_etherdev_mq(sizeof(struct mana_port_context), + gc->max_num_queues); + if (!ndev) + return -ENOMEM; + + *ndev_storage = ndev; + + apc = netdev_priv(ndev); + apc->ac = ac; + apc->ndev = ndev; + apc->max_queues = gc->max_num_queues; + apc->num_queues = gc->max_num_queues; + apc->port_handle = INVALID_MANA_HANDLE; + apc->pf_filter_handle = INVALID_MANA_HANDLE; + apc->port_idx = port_idx; + + mutex_init(&apc->vport_mutex); + apc->vport_use_count = 0; + + ndev->netdev_ops = &mana_devops; + ndev->ethtool_ops = &mana_ethtool_ops; + ndev->mtu = ETH_DATA_LEN; + ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; + ndev->min_mtu = ETH_MIN_MTU; + ndev->needed_headroom = MANA_HEADROOM; + ndev->dev_port = port_idx; + SET_NETDEV_DEV(ndev, gc->dev); + + netif_carrier_off(ndev); + + netdev_rss_key_fill(apc->hashkey, MANA_HASH_KEY_SIZE); + + err = mana_init_port(ndev); + if (err) + goto free_net; + + netdev_lockdep_set_classes(ndev); + + ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + ndev->hw_features |= NETIF_F_RXCSUM; + ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; + ndev->hw_features |= NETIF_F_RXHASH; + ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + ndev->vlan_features = ndev->features; + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + + err = register_netdev(ndev); + if (err) { + netdev_err(ndev, "Unable to register netdev.\n"); + goto reset_apc; + } + + return 0; + +reset_apc: + kfree(apc->rxqs); + apc->rxqs = NULL; +free_net: + *ndev_storage = NULL; + netdev_err(ndev, "Failed to probe vPort %d: %d\n", port_idx, err); + free_netdev(ndev); + return err; +} + +static void adev_release(struct device *dev) +{ + struct mana_adev *madev = container_of(dev, struct mana_adev, adev.dev); + + kfree(madev); +} + +static void remove_adev(struct gdma_dev *gd) +{ + struct auxiliary_device *adev = gd->adev; + int id = adev->id; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); + + mana_adev_idx_free(id); + gd->adev = NULL; +} + +static int add_adev(struct gdma_dev *gd) +{ + struct auxiliary_device *adev; + struct mana_adev *madev; + int ret; + + madev = kzalloc(sizeof(*madev), GFP_KERNEL); + if (!madev) + return -ENOMEM; + + adev = &madev->adev; + ret = mana_adev_idx_alloc(); + if (ret < 0) + goto idx_fail; + adev->id = ret; + + adev->name = "rdma"; + adev->dev.parent = gd->gdma_context->dev; + adev->dev.release = adev_release; + madev->mdev = gd; + + ret = auxiliary_device_init(adev); + if (ret) + goto init_fail; + + ret = auxiliary_device_add(adev); + if (ret) + goto add_fail; + + gd->adev = adev; + return 0; + +add_fail: + auxiliary_device_uninit(adev); + +init_fail: + mana_adev_idx_free(adev->id); + +idx_fail: + kfree(madev); + + return ret; +} + +int mana_probe(struct gdma_dev *gd, bool resuming) +{ + struct gdma_context *gc = gd->gdma_context; + struct mana_context *ac = gd->driver_data; + struct device *dev = gc->dev; + u16 num_ports = 0; + int err; + int i; + + dev_info(dev, + "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", + MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); + + err = mana_gd_register_device(gd); + if (err) + return err; + + if (!resuming) { + ac = kzalloc(sizeof(*ac), GFP_KERNEL); + if (!ac) + return -ENOMEM; + + ac->gdma_dev = gd; + gd->driver_data = ac; + } + + err = mana_create_eq(ac); + if (err) + goto out; + + err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, + MANA_MICRO_VERSION, &num_ports); + if (err) + goto out; + + if (!resuming) { + ac->num_ports = num_ports; + } else { + if (ac->num_ports != num_ports) { + dev_err(dev, "The number of vPorts changed: %d->%d\n", + ac->num_ports, num_ports); + err = -EPROTO; + goto out; + } + } + + if (ac->num_ports == 0) + dev_err(dev, "Failed to detect any vPort\n"); + + if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) + ac->num_ports = MAX_PORTS_IN_MANA_DEV; + + if (!resuming) { + for (i = 0; i < ac->num_ports; i++) { + err = mana_probe_port(ac, i, &ac->ports[i]); + if (err) + break; + } + } else { + for (i = 0; i < ac->num_ports; i++) { + rtnl_lock(); + err = mana_attach(ac->ports[i]); + rtnl_unlock(); + if (err) + break; + } + } + + err = add_adev(gd); +out: + if (err) + mana_remove(gd, false); + + return err; +} + +void mana_remove(struct gdma_dev *gd, bool suspending) +{ + struct gdma_context *gc = gd->gdma_context; + struct mana_context *ac = gd->driver_data; + struct device *dev = gc->dev; + struct net_device *ndev; + int err; + int i; + + /* adev currently doesn't support suspending, always remove it */ + if (gd->adev) + remove_adev(gd); + + for (i = 0; i < ac->num_ports; i++) { + ndev = ac->ports[i]; + if (!ndev) { + if (i == 0) + dev_err(dev, "No net device to remove\n"); + goto out; + } + + /* All cleanup actions should stay after rtnl_lock(), otherwise + * other functions may access partially cleaned up data. + */ + rtnl_lock(); + + err = mana_detach(ndev, false); + if (err) + netdev_err(ndev, "Failed to detach vPort %d: %d\n", + i, err); + + if (suspending) { + /* No need to unregister the ndev. */ + rtnl_unlock(); + continue; + } + + unregister_netdevice(ndev); + + rtnl_unlock(); + + free_netdev(ndev); + } + + mana_destroy_eq(ac); +out: + mana_gd_deregister_device(gd); + + if (suspending) + return; + + gd->driver_data = NULL; + gd->gdma_context = NULL; + kfree(ac); +} diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c new file mode 100644 index 0000000000..607150165a --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include +#include +#include + +#include + +static const struct { + char name[ETH_GSTRING_LEN]; + u16 offset; +} mana_eth_stats[] = { + {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, + {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, + {"hc_tx_bytes", offsetof(struct mana_ethtool_stats, hc_tx_bytes)}, + {"hc_tx_ucast_pkts", offsetof(struct mana_ethtool_stats, + hc_tx_ucast_pkts)}, + {"hc_tx_ucast_bytes", offsetof(struct mana_ethtool_stats, + hc_tx_ucast_bytes)}, + {"hc_tx_bcast_pkts", offsetof(struct mana_ethtool_stats, + hc_tx_bcast_pkts)}, + {"hc_tx_bcast_bytes", offsetof(struct mana_ethtool_stats, + hc_tx_bcast_bytes)}, + {"hc_tx_mcast_pkts", offsetof(struct mana_ethtool_stats, + hc_tx_mcast_pkts)}, + {"hc_tx_mcast_bytes", offsetof(struct mana_ethtool_stats, + hc_tx_mcast_bytes)}, + {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, + {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, + tx_cqe_unknown_type)}, + {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, + rx_coalesced_err)}, + {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, + rx_cqe_unknown_type)}, +}; + +static int mana_get_sset_count(struct net_device *ndev, int stringset) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + + if (stringset != ETH_SS_STATS) + return -EINVAL; + + return ARRAY_SIZE(mana_eth_stats) + num_queues * + (MANA_STATS_RX_COUNT + MANA_STATS_TX_COUNT); +} + +static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + u8 *p = data; + int i; + + if (stringset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(mana_eth_stats); i++) { + memcpy(p, mana_eth_stats[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < num_queues; i++) { + sprintf(p, "rx_%d_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_xdp_drop", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_xdp_tx", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_%d_xdp_redirect", i); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < num_queues; i++) { + sprintf(p, "tx_%d_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_xdp_xmit", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_tso_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_tso_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_tso_inner_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_tso_inner_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_long_pkt_fmt", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_short_pkt_fmt", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_csum_partial", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_%d_mana_map_err", i); + p += ETH_GSTRING_LEN; + } +} + +static void mana_get_ethtool_stats(struct net_device *ndev, + struct ethtool_stats *e_stats, u64 *data) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int num_queues = apc->num_queues; + void *eth_stats = &apc->eth_stats; + struct mana_stats_rx *rx_stats; + struct mana_stats_tx *tx_stats; + unsigned int start; + u64 packets, bytes; + u64 xdp_redirect; + u64 xdp_xmit; + u64 xdp_drop; + u64 xdp_tx; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 long_pkt_fmt; + u64 short_pkt_fmt; + u64 csum_partial; + u64 mana_map_err; + int q, i = 0; + + if (!apc->port_is_up) + return; + /* we call mana function to update stats from GDMA */ + mana_query_gf_stats(apc); + + for (q = 0; q < ARRAY_SIZE(mana_eth_stats); q++) + data[i++] = *(u64 *)(eth_stats + mana_eth_stats[q].offset); + + for (q = 0; q < num_queues; q++) { + rx_stats = &apc->rxqs[q]->stats; + + do { + start = u64_stats_fetch_begin(&rx_stats->syncp); + packets = rx_stats->packets; + bytes = rx_stats->bytes; + xdp_drop = rx_stats->xdp_drop; + xdp_tx = rx_stats->xdp_tx; + xdp_redirect = rx_stats->xdp_redirect; + } while (u64_stats_fetch_retry(&rx_stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; + data[i++] = xdp_drop; + data[i++] = xdp_tx; + data[i++] = xdp_redirect; + } + + for (q = 0; q < num_queues; q++) { + tx_stats = &apc->tx_qp[q].txq.stats; + + do { + start = u64_stats_fetch_begin(&tx_stats->syncp); + packets = tx_stats->packets; + bytes = tx_stats->bytes; + xdp_xmit = tx_stats->xdp_xmit; + tso_packets = tx_stats->tso_packets; + tso_bytes = tx_stats->tso_bytes; + tso_inner_packets = tx_stats->tso_inner_packets; + tso_inner_bytes = tx_stats->tso_inner_bytes; + long_pkt_fmt = tx_stats->long_pkt_fmt; + short_pkt_fmt = tx_stats->short_pkt_fmt; + csum_partial = tx_stats->csum_partial; + mana_map_err = tx_stats->mana_map_err; + } while (u64_stats_fetch_retry(&tx_stats->syncp, start)); + + data[i++] = packets; + data[i++] = bytes; + data[i++] = xdp_xmit; + data[i++] = tso_packets; + data[i++] = tso_bytes; + data[i++] = tso_inner_packets; + data[i++] = tso_inner_bytes; + data[i++] = long_pkt_fmt; + data[i++] = short_pkt_fmt; + data[i++] = csum_partial; + data[i++] = mana_map_err; + } +} + +static int mana_get_rxnfc(struct net_device *ndev, struct ethtool_rxnfc *cmd, + u32 *rules) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = apc->num_queues; + return 0; + } + + return -EOPNOTSUPP; +} + +static u32 mana_get_rxfh_key_size(struct net_device *ndev) +{ + return MANA_HASH_KEY_SIZE; +} + +static u32 mana_rss_indir_size(struct net_device *ndev) +{ + return MANA_INDIRECT_TABLE_SIZE; +} + +static int mana_get_rxfh(struct net_device *ndev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct mana_port_context *apc = netdev_priv(ndev); + int i; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ + + if (indir) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + indir[i] = apc->indir_table[i]; + } + + if (key) + memcpy(key, apc->hashkey, MANA_HASH_KEY_SIZE); + + return 0; +} + +static int mana_set_rxfh(struct net_device *ndev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mana_port_context *apc = netdev_priv(ndev); + bool update_hash = false, update_table = false; + u32 save_table[MANA_INDIRECT_TABLE_SIZE]; + u8 save_key[MANA_HASH_KEY_SIZE]; + int i, err; + + if (!apc->port_is_up) + return -EOPNOTSUPP; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (indir) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + if (indir[i] >= apc->num_queues) + return -EINVAL; + + update_table = true; + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + save_table[i] = apc->indir_table[i]; + apc->indir_table[i] = indir[i]; + } + } + + if (key) { + update_hash = true; + memcpy(save_key, apc->hashkey, MANA_HASH_KEY_SIZE); + memcpy(apc->hashkey, key, MANA_HASH_KEY_SIZE); + } + + err = mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); + + if (err) { /* recover to original values */ + if (update_table) { + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + apc->indir_table[i] = save_table[i]; + } + + if (update_hash) + memcpy(apc->hashkey, save_key, MANA_HASH_KEY_SIZE); + + mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); + } + + return err; +} + +static void mana_get_channels(struct net_device *ndev, + struct ethtool_channels *channel) +{ + struct mana_port_context *apc = netdev_priv(ndev); + + channel->max_combined = apc->max_queues; + channel->combined_count = apc->num_queues; +} + +static int mana_set_channels(struct net_device *ndev, + struct ethtool_channels *channels) +{ + struct mana_port_context *apc = netdev_priv(ndev); + unsigned int new_count = channels->combined_count; + unsigned int old_count = apc->num_queues; + int err, err2; + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, "mana_detach failed: %d\n", err); + return err; + } + + apc->num_queues = new_count; + err = mana_attach(ndev); + if (!err) + return 0; + + netdev_err(ndev, "mana_attach failed: %d\n", err); + + /* Try to roll it back to the old configuration. */ + apc->num_queues = old_count; + err2 = mana_attach(ndev); + if (err2) + netdev_err(ndev, "mana re-attach failed: %d\n", err2); + + return err; +} + +const struct ethtool_ops mana_ethtool_ops = { + .get_ethtool_stats = mana_get_ethtool_stats, + .get_sset_count = mana_get_sset_count, + .get_strings = mana_get_strings, + .get_rxnfc = mana_get_rxnfc, + .get_rxfh_key_size = mana_get_rxfh_key_size, + .get_rxfh_indir_size = mana_rss_indir_size, + .get_rxfh = mana_get_rxfh, + .set_rxfh = mana_set_rxfh, + .get_channels = mana_get_channels, + .set_channels = mana_set_channels, +}; diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c new file mode 100644 index 0000000000..5553af9c80 --- /dev/null +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +/* Copyright (c) 2021, Microsoft Corporation. */ + +#include +#include +#include +#include + +#include + +#define PAGE_FRAME_L48_WIDTH_BYTES 6 +#define PAGE_FRAME_L48_WIDTH_BITS (PAGE_FRAME_L48_WIDTH_BYTES * 8) +#define PAGE_FRAME_L48_MASK 0x0000FFFFFFFFFFFF +#define PAGE_FRAME_H4_WIDTH_BITS 4 +#define VECTOR_MASK 0xFFFF +#define SHMEM_VF_RESET_STATE ((u32)-1) + +#define SMC_MSG_TYPE_ESTABLISH_HWC 1 +#define SMC_MSG_TYPE_ESTABLISH_HWC_VERSION 0 + +#define SMC_MSG_TYPE_DESTROY_HWC 2 +#define SMC_MSG_TYPE_DESTROY_HWC_VERSION 0 + +#define SMC_MSG_DIRECTION_REQUEST 0 +#define SMC_MSG_DIRECTION_RESPONSE 1 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +/* Shared memory channel protocol header + * + * msg_type: set on request and response; response matches request. + * msg_version: newer PF writes back older response (matching request) + * older PF acts on latest version known and sets that version in result + * (less than request). + * direction: 0 for request, VF->PF; 1 for response, PF->VF. + * status: 0 on request, + * operation result on response (success = 0, failure = 1 or greater). + * reset_vf: If set on either establish or destroy request, indicates perform + * FLR before/after the operation. + * owner_is_pf: 1 indicates PF owned, 0 indicates VF owned. + */ +union smc_proto_hdr { + u32 as_uint32; + + struct { + u8 msg_type : 3; + u8 msg_version : 3; + u8 reserved_1 : 1; + u8 direction : 1; + + u8 status; + + u8 reserved_2; + + u8 reset_vf : 1; + u8 reserved_3 : 6; + u8 owner_is_pf : 1; + }; +}; /* HW DATA */ + +#define SMC_APERTURE_BITS 256 +#define SMC_BASIC_UNIT (sizeof(u32)) +#define SMC_APERTURE_DWORDS (SMC_APERTURE_BITS / (SMC_BASIC_UNIT * 8)) +#define SMC_LAST_DWORD (SMC_APERTURE_DWORDS - 1) + +static int mana_smc_poll_register(void __iomem *base, bool reset) +{ + void __iomem *ptr = base + SMC_LAST_DWORD * SMC_BASIC_UNIT; + u32 last_dword; + int i; + + /* Poll the hardware for the ownership bit. This should be pretty fast, + * but let's do it in a loop just in case the hardware or the PF + * driver are temporarily busy. + */ + for (i = 0; i < 20 * 1000; i++) { + last_dword = readl(ptr); + + /* shmem reads as 0xFFFFFFFF in the reset case */ + if (reset && last_dword == SHMEM_VF_RESET_STATE) + return 0; + + /* If bit_31 is set, the PF currently owns the SMC. */ + if (!(last_dword & BIT(31))) + return 0; + + usleep_range(1000, 2000); + } + + return -ETIMEDOUT; +} + +static int mana_smc_read_response(struct shm_channel *sc, u32 msg_type, + u32 msg_version, bool reset_vf) +{ + void __iomem *base = sc->base; + union smc_proto_hdr hdr; + int err; + + /* Wait for PF to respond. */ + err = mana_smc_poll_register(base, reset_vf); + if (err) + return err; + + hdr.as_uint32 = readl(base + SMC_LAST_DWORD * SMC_BASIC_UNIT); + + if (reset_vf && hdr.as_uint32 == SHMEM_VF_RESET_STATE) + return 0; + + /* Validate protocol fields from the PF driver */ + if (hdr.msg_type != msg_type || hdr.msg_version > msg_version || + hdr.direction != SMC_MSG_DIRECTION_RESPONSE) { + dev_err(sc->dev, "Wrong SMC response 0x%x, type=%d, ver=%d\n", + hdr.as_uint32, msg_type, msg_version); + return -EPROTO; + } + + /* Validate the operation result */ + if (hdr.status != 0) { + dev_err(sc->dev, "SMC operation failed: 0x%x\n", hdr.status); + return -EPROTO; + } + + return 0; +} + +void mana_smc_init(struct shm_channel *sc, struct device *dev, + void __iomem *base) +{ + sc->dev = dev; + sc->base = base; +} + +int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, + u64 cq_addr, u64 rq_addr, u64 sq_addr, + u32 eq_msix_index) +{ + union smc_proto_hdr *hdr; + u16 all_addr_h4bits = 0; + u16 frame_addr_seq = 0; + u64 frame_addr = 0; + u8 shm_buf[32]; + u64 *shmem; + u32 *dword; + u8 *ptr; + int err; + int i; + + /* Ensure VF already has possession of shared memory */ + err = mana_smc_poll_register(sc->base, false); + if (err) { + dev_err(sc->dev, "Timeout when setting up HWC: %d\n", err); + return err; + } + + if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || + !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) + return -EINVAL; + + if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) + return -EINVAL; + + /* Scheme for packing four addresses and extra info into 256 bits. + * + * Addresses must be page frame aligned, so only frame address bits + * are transferred. + * + * 52-bit frame addresses are split into the lower 48 bits and upper + * 4 bits. Lower 48 bits of 4 address are written sequentially from + * the start of the 256-bit shared memory region followed by 16 bits + * containing the upper 4 bits of the 4 addresses in sequence. + * + * A 16 bit EQ vector number fills out the next-to-last 32-bit dword. + * + * The final 32-bit dword is used for protocol control information as + * defined in smc_proto_hdr. + */ + + memset(shm_buf, 0, sizeof(shm_buf)); + ptr = shm_buf; + + /* EQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(eq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* CQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(cq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* RQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(rq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* SQ addr: low 48 bits of frame address */ + shmem = (u64 *)ptr; + frame_addr = PHYS_PFN(sq_addr); + *shmem = frame_addr & PAGE_FRAME_L48_MASK; + all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << + (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); + ptr += PAGE_FRAME_L48_WIDTH_BYTES; + + /* High 4 bits of the four frame addresses */ + *((u16 *)ptr) = all_addr_h4bits; + ptr += sizeof(u16); + + /* EQ MSIX vector number */ + *((u16 *)ptr) = (u16)eq_msix_index; + ptr += sizeof(u16); + + /* 32-bit protocol header in final dword */ + *((u32 *)ptr) = 0; + + hdr = (union smc_proto_hdr *)ptr; + hdr->msg_type = SMC_MSG_TYPE_ESTABLISH_HWC; + hdr->msg_version = SMC_MSG_TYPE_ESTABLISH_HWC_VERSION; + hdr->direction = SMC_MSG_DIRECTION_REQUEST; + hdr->reset_vf = reset_vf; + + /* Write 256-message buffer to shared memory (final 32-bit write + * triggers HW to set possession bit to PF). + */ + dword = (u32 *)shm_buf; + for (i = 0; i < SMC_APERTURE_DWORDS; i++) + writel(*dword++, sc->base + i * SMC_BASIC_UNIT); + + /* Read shmem response (polling for VF possession) and validate. + * For setup, waiting for response on shared memory is not strictly + * necessary, since wait occurs later for results to appear in EQE's. + */ + err = mana_smc_read_response(sc, SMC_MSG_TYPE_ESTABLISH_HWC, + SMC_MSG_TYPE_ESTABLISH_HWC_VERSION, + reset_vf); + if (err) { + dev_err(sc->dev, "Error when setting up HWC: %d\n", err); + return err; + } + + return 0; +} + +int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf) +{ + union smc_proto_hdr hdr = {}; + int err; + + /* Ensure already has possession of shared memory */ + err = mana_smc_poll_register(sc->base, false); + if (err) { + dev_err(sc->dev, "Timeout when tearing down HWC\n"); + return err; + } + + /* Set up protocol header for HWC destroy message */ + hdr.msg_type = SMC_MSG_TYPE_DESTROY_HWC; + hdr.msg_version = SMC_MSG_TYPE_DESTROY_HWC_VERSION; + hdr.direction = SMC_MSG_DIRECTION_REQUEST; + hdr.reset_vf = reset_vf; + + /* Write message in high 32 bits of 256-bit shared memory, causing HW + * to set possession bit to PF. + */ + writel(hdr.as_uint32, sc->base + SMC_LAST_DWORD * SMC_BASIC_UNIT); + + /* Read shmem response (polling for VF possession) and validate. + * For teardown, waiting for response is required to ensure hardware + * invalidates MST entries before software frees memory. + */ + err = mana_smc_read_response(sc, SMC_MSG_TYPE_DESTROY_HWC, + SMC_MSG_TYPE_DESTROY_HWC_VERSION, + reset_vf); + if (err) { + dev_err(sc->dev, "Error when tearing down HWC: %d\n", err); + return err; + } + + return 0; +} -- cgit v1.2.3