diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
commit | 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch) | |
tree | a94efe259b9009378be6d90eb30d2b019d95c194 /drivers/vdpa | |
parent | Initial commit. (diff) | |
download | linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip |
Adding upstream version 5.10.209.upstream/5.10.209
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/vdpa')
-rw-r--r-- | drivers/vdpa/Kconfig | 52 | ||||
-rw-r--r-- | drivers/vdpa/Makefile | 5 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/Makefile | 3 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_base.c | 390 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_base.h | 122 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_main.c | 505 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/Makefile | 4 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/core/mlx5_vdpa.h | 96 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h | 168 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/core/mr.c | 473 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/core/resources.c | 284 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/net/main.c | 76 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.c | 2053 | ||||
-rw-r--r-- | drivers/vdpa/mlx5/net/mlx5_vnet.h | 24 | ||||
-rw-r--r-- | drivers/vdpa/vdpa.c | 184 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/Makefile | 2 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim.c | 780 |
17 files changed, 5221 insertions, 0 deletions
diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig new file mode 100644 index 000000000..6caf53909 --- /dev/null +++ b/drivers/vdpa/Kconfig @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0-only +menuconfig VDPA + tristate "vDPA drivers" + help + Enable this module to support vDPA device that uses a + datapath which complies with virtio specifications with + vendor specific control path. + +if VDPA + +config VDPA_SIM + tristate "vDPA device simulator" + depends on RUNTIME_TESTING_MENU && HAS_DMA + select DMA_OPS + select VHOST_RING + select GENERIC_NET_UTILS + default n + help + vDPA networking device simulator which loop TX traffic back + to RX. This device is used for testing, prototyping and + development of vDPA. + +config IFCVF + tristate "Intel IFC VF vDPA driver" + depends on PCI_MSI + default n + help + This kernel module can drive Intel IFC VF NIC to offload + virtio dataplane traffic to hardware. + To compile this driver as a module, choose M here: the module will + be called ifcvf. + +config MLX5_VDPA + bool + select VHOST_IOTLB + help + Support library for Mellanox VDPA drivers. Provides code that is + common for all types of VDPA drivers. The following drivers are planned: + net, block. + +config MLX5_VDPA_NET + tristate "vDPA driver for ConnectX devices" + select MLX5_VDPA + depends on MLX5_CORE + default n + help + VDPA network driver for ConnectX6 and newer. Provides offloading + of virtio net datapath such that descriptors put on the ring will + be executed by the hardware. It also supports a variety of stateless + offloads depending on the actual device used and firmware version. + +endif # VDPA diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile new file mode 100644 index 000000000..d160e9b63 --- /dev/null +++ b/drivers/vdpa/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VDPA) += vdpa.o +obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ +obj-$(CONFIG_IFCVF) += ifcvf/ +obj-$(CONFIG_MLX5_VDPA) += mlx5/ diff --git a/drivers/vdpa/ifcvf/Makefile b/drivers/vdpa/ifcvf/Makefile new file mode 100644 index 000000000..d70991599 --- /dev/null +++ b/drivers/vdpa/ifcvf/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_IFCVF) += ifcvf.o +ifcvf-$(CONFIG_IFCVF) += ifcvf_main.o ifcvf_base.o diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c new file mode 100644 index 000000000..f2a128e56 --- /dev/null +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel IFC VF NIC driver for virtio dataplane offloading + * + * Copyright (C) 2020 Intel Corporation. + * + * Author: Zhu Lingshan <lingshan.zhu@intel.com> + * + */ + +#include "ifcvf_base.h" + +static inline u8 ifc_ioread8(u8 __iomem *addr) +{ + return ioread8(addr); +} +static inline u16 ifc_ioread16 (__le16 __iomem *addr) +{ + return ioread16(addr); +} + +static inline u32 ifc_ioread32(__le32 __iomem *addr) +{ + return ioread32(addr); +} + +static inline void ifc_iowrite8(u8 value, u8 __iomem *addr) +{ + iowrite8(value, addr); +} + +static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr) +{ + iowrite16(value, addr); +} + +static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr) +{ + iowrite32(value, addr); +} + +static void ifc_iowrite64_twopart(u64 val, + __le32 __iomem *lo, __le32 __iomem *hi) +{ + ifc_iowrite32((u32)val, lo); + ifc_iowrite32(val >> 32, hi); +} + +struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw) +{ + return container_of(hw, struct ifcvf_adapter, vf); +} + +static void __iomem *get_cap_addr(struct ifcvf_hw *hw, + struct virtio_pci_cap *cap) +{ + struct ifcvf_adapter *ifcvf; + struct pci_dev *pdev; + u32 length, offset; + u8 bar; + + length = le32_to_cpu(cap->length); + offset = le32_to_cpu(cap->offset); + bar = cap->bar; + + ifcvf= vf_to_adapter(hw); + pdev = ifcvf->pdev; + + if (bar >= IFCVF_PCI_MAX_RESOURCE) { + IFCVF_DBG(pdev, + "Invalid bar number %u to get capabilities\n", bar); + return NULL; + } + + if (offset + length > pci_resource_len(pdev, bar)) { + IFCVF_DBG(pdev, + "offset(%u) + len(%u) overflows bar%u's capability\n", + offset, length, bar); + return NULL; + } + + return hw->base[bar] + offset; +} + +static int ifcvf_read_config_range(struct pci_dev *dev, + uint32_t *val, int size, int where) +{ + int ret, i; + + for (i = 0; i < size; i += 4) { + ret = pci_read_config_dword(dev, where + i, val + i / 4); + if (ret < 0) + return ret; + } + + return 0; +} + +int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *pdev) +{ + struct virtio_pci_cap cap; + u16 notify_off; + int ret; + u8 pos; + u32 i; + + ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos); + if (ret < 0) { + IFCVF_ERR(pdev, "Failed to read PCI capability list\n"); + return -EIO; + } + + while (pos) { + ret = ifcvf_read_config_range(pdev, (u32 *)&cap, + sizeof(cap), pos); + if (ret < 0) { + IFCVF_ERR(pdev, + "Failed to get PCI capability at %x\n", pos); + break; + } + + if (cap.cap_vndr != PCI_CAP_ID_VNDR) + goto next; + + switch (cap.cfg_type) { + case VIRTIO_PCI_CAP_COMMON_CFG: + hw->common_cfg = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->common_cfg = %p\n", + hw->common_cfg); + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + pci_read_config_dword(pdev, pos + sizeof(cap), + &hw->notify_off_multiplier); + hw->notify_bar = cap.bar; + hw->notify_base = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->notify_base = %p\n", + hw->notify_base); + break; + case VIRTIO_PCI_CAP_ISR_CFG: + hw->isr = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->isr = %p\n", hw->isr); + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + hw->net_cfg = get_cap_addr(hw, &cap); + IFCVF_DBG(pdev, "hw->net_cfg = %p\n", hw->net_cfg); + break; + } + +next: + pos = cap.cap_next; + } + + if (hw->common_cfg == NULL || hw->notify_base == NULL || + hw->isr == NULL || hw->net_cfg == NULL) { + IFCVF_ERR(pdev, "Incomplete PCI capabilities\n"); + return -EIO; + } + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + ifc_iowrite16(i, &hw->common_cfg->queue_select); + notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off); + hw->vring[i].notify_addr = hw->notify_base + + notify_off * hw->notify_off_multiplier; + } + + hw->lm_cfg = hw->base[IFCVF_LM_BAR]; + + IFCVF_DBG(pdev, + "PCI capability mapping: common cfg: %p, notify base: %p\n, isr cfg: %p, device cfg: %p, multiplier: %u\n", + hw->common_cfg, hw->notify_base, hw->isr, + hw->net_cfg, hw->notify_off_multiplier); + + return 0; +} + +u8 ifcvf_get_status(struct ifcvf_hw *hw) +{ + return ifc_ioread8(&hw->common_cfg->device_status); +} + +void ifcvf_set_status(struct ifcvf_hw *hw, u8 status) +{ + ifc_iowrite8(status, &hw->common_cfg->device_status); +} + +void ifcvf_reset(struct ifcvf_hw *hw) +{ + hw->config_cb.callback = NULL; + hw->config_cb.private = NULL; + + ifcvf_set_status(hw, 0); + /* flush set_status, make sure VF is stopped, reset */ + ifcvf_get_status(hw); +} + +static void ifcvf_add_status(struct ifcvf_hw *hw, u8 status) +{ + if (status != 0) + status |= ifcvf_get_status(hw); + + ifcvf_set_status(hw, status); + ifcvf_get_status(hw); +} + +u64 ifcvf_get_features(struct ifcvf_hw *hw) +{ + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + u32 features_lo, features_hi; + + ifc_iowrite32(0, &cfg->device_feature_select); + features_lo = ifc_ioread32(&cfg->device_feature); + + ifc_iowrite32(1, &cfg->device_feature_select); + features_hi = ifc_ioread32(&cfg->device_feature); + + return ((u64)features_hi << 32) | features_lo; +} + +void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, + void *dst, int length) +{ + u8 old_gen, new_gen, *p; + int i; + + WARN_ON(offset + length > sizeof(struct virtio_net_config)); + do { + old_gen = ifc_ioread8(&hw->common_cfg->config_generation); + p = dst; + for (i = 0; i < length; i++) + *p++ = ifc_ioread8(hw->net_cfg + offset + i); + + new_gen = ifc_ioread8(&hw->common_cfg->config_generation); + } while (old_gen != new_gen); +} + +void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, + const void *src, int length) +{ + const u8 *p; + int i; + + p = src; + WARN_ON(offset + length > sizeof(struct virtio_net_config)); + for (i = 0; i < length; i++) + ifc_iowrite8(*p++, hw->net_cfg + offset + i); +} + +static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features) +{ + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + + ifc_iowrite32(0, &cfg->guest_feature_select); + ifc_iowrite32((u32)features, &cfg->guest_feature); + + ifc_iowrite32(1, &cfg->guest_feature_select); + ifc_iowrite32(features >> 32, &cfg->guest_feature); +} + +static int ifcvf_config_features(struct ifcvf_hw *hw) +{ + struct ifcvf_adapter *ifcvf; + + ifcvf = vf_to_adapter(hw); + ifcvf_set_features(hw, hw->req_features); + ifcvf_add_status(hw, VIRTIO_CONFIG_S_FEATURES_OK); + + if (!(ifcvf_get_status(hw) & VIRTIO_CONFIG_S_FEATURES_OK)) { + IFCVF_ERR(ifcvf->pdev, "Failed to set FEATURES_OK status\n"); + return -EIO; + } + + return 0; +} + +u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) +{ + struct ifcvf_lm_cfg __iomem *ifcvf_lm; + void __iomem *avail_idx_addr; + u16 last_avail_idx; + u32 q_pair_id; + + ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; + q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); + avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; + last_avail_idx = ifc_ioread16(avail_idx_addr); + + return last_avail_idx; +} + +int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num) +{ + struct ifcvf_lm_cfg __iomem *ifcvf_lm; + void __iomem *avail_idx_addr; + u32 q_pair_id; + + ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; + q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2); + avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; + hw->vring[qid].last_avail_idx = num; + ifc_iowrite16(num, avail_idx_addr); + + return 0; +} + +static int ifcvf_hw_enable(struct ifcvf_hw *hw) +{ + struct virtio_pci_common_cfg __iomem *cfg; + struct ifcvf_adapter *ifcvf; + u32 i; + + ifcvf = vf_to_adapter(hw); + cfg = hw->common_cfg; + ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config); + + if (ifc_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n"); + return -EINVAL; + } + + for (i = 0; i < hw->nr_vring; i++) { + if (!hw->vring[i].ready) + break; + + ifc_iowrite16(i, &cfg->queue_select); + ifc_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, + &cfg->queue_desc_hi); + ifc_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, + &cfg->queue_avail_hi); + ifc_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo, + &cfg->queue_used_hi); + ifc_iowrite16(hw->vring[i].size, &cfg->queue_size); + ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector); + + if (ifc_ioread16(&cfg->queue_msix_vector) == + VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(ifcvf->pdev, + "No msix vector for queue %u\n", i); + return -EINVAL; + } + + ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx); + ifc_iowrite16(1, &cfg->queue_enable); + } + + return 0; +} + +static void ifcvf_hw_disable(struct ifcvf_hw *hw) +{ + struct virtio_pci_common_cfg __iomem *cfg; + u32 i; + + cfg = hw->common_cfg; + ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config); + + for (i = 0; i < hw->nr_vring; i++) { + ifc_iowrite16(i, &cfg->queue_select); + ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector); + } + + ifc_ioread16(&cfg->queue_msix_vector); +} + +int ifcvf_start_hw(struct ifcvf_hw *hw) +{ + ifcvf_reset(hw); + ifcvf_add_status(hw, VIRTIO_CONFIG_S_ACKNOWLEDGE); + ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER); + + if (ifcvf_config_features(hw) < 0) + return -EINVAL; + + if (ifcvf_hw_enable(hw) < 0) + return -EINVAL; + + ifcvf_add_status(hw, VIRTIO_CONFIG_S_DRIVER_OK); + + return 0; +} + +void ifcvf_stop_hw(struct ifcvf_hw *hw) +{ + ifcvf_hw_disable(hw); + ifcvf_reset(hw); +} + +void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid) +{ + ifc_iowrite16(qid, hw->vring[qid].notify_addr); +} diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h new file mode 100644 index 000000000..64696d63f --- /dev/null +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Intel IFC VF NIC driver for virtio dataplane offloading + * + * Copyright (C) 2020 Intel Corporation. + * + * Author: Zhu Lingshan <lingshan.zhu@intel.com> + * + */ + +#ifndef _IFCVF_H_ +#define _IFCVF_H_ + +#include <linux/pci.h> +#include <linux/pci_regs.h> +#include <linux/vdpa.h> +#include <uapi/linux/virtio_net.h> +#include <uapi/linux/virtio_config.h> +#include <uapi/linux/virtio_pci.h> + +#define IFCVF_VENDOR_ID 0x1AF4 +#define IFCVF_DEVICE_ID 0x1041 +#define IFCVF_SUBSYS_VENDOR_ID 0x8086 +#define IFCVF_SUBSYS_DEVICE_ID 0x001A + +#define IFCVF_SUPPORTED_FEATURES \ + ((1ULL << VIRTIO_NET_F_MAC) | \ + (1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_NET_F_STATUS) | \ + (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | \ + (1ULL << VIRTIO_NET_F_MRG_RXBUF)) + +/* Only one queue pair for now. */ +#define IFCVF_MAX_QUEUE_PAIRS 1 + +#define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE +#define IFCVF_QUEUE_MAX 32768 +#define IFCVF_MSI_CONFIG_OFF 0 +#define IFCVF_MSI_QUEUE_OFF 1 +#define IFCVF_PCI_MAX_RESOURCE 6 + +#define IFCVF_LM_CFG_SIZE 0x40 +#define IFCVF_LM_RING_STATE_OFFSET 0x20 +#define IFCVF_LM_BAR 4 + +#define IFCVF_ERR(pdev, fmt, ...) dev_err(&pdev->dev, fmt, ##__VA_ARGS__) +#define IFCVF_DBG(pdev, fmt, ...) dev_dbg(&pdev->dev, fmt, ##__VA_ARGS__) +#define IFCVF_INFO(pdev, fmt, ...) dev_info(&pdev->dev, fmt, ##__VA_ARGS__) + +#define ifcvf_private_to_vf(adapter) \ + (&((struct ifcvf_adapter *)adapter)->vf) + +#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1) + +struct vring_info { + u64 desc; + u64 avail; + u64 used; + u16 size; + u16 last_avail_idx; + bool ready; + void __iomem *notify_addr; + u32 irq; + struct vdpa_callback cb; + char msix_name[256]; +}; + +struct ifcvf_hw { + u8 __iomem *isr; + /* Live migration */ + u8 __iomem *lm_cfg; + u16 nr_vring; + /* Notification bar number */ + u8 notify_bar; + /* Notificaiton bar address */ + void __iomem *notify_base; + u32 notify_off_multiplier; + u64 req_features; + struct virtio_pci_common_cfg __iomem *common_cfg; + void __iomem *net_cfg; + struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2]; + void __iomem * const *base; + char config_msix_name[256]; + struct vdpa_callback config_cb; + unsigned int config_irq; +}; + +struct ifcvf_adapter { + struct vdpa_device vdpa; + struct pci_dev *pdev; + struct ifcvf_hw vf; +}; + +struct ifcvf_vring_lm_cfg { + u32 idx_addr[2]; + u8 reserved[IFCVF_LM_CFG_SIZE - 8]; +}; + +struct ifcvf_lm_cfg { + u8 reserved[IFCVF_LM_RING_STATE_OFFSET]; + struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS]; +}; + +int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev); +int ifcvf_start_hw(struct ifcvf_hw *hw); +void ifcvf_stop_hw(struct ifcvf_hw *hw); +void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid); +void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset, + void *dst, int length); +void ifcvf_write_net_config(struct ifcvf_hw *hw, u64 offset, + const void *src, int length); +u8 ifcvf_get_status(struct ifcvf_hw *hw); +void ifcvf_set_status(struct ifcvf_hw *hw, u8 status); +void io_write64_twopart(u64 val, u32 *lo, u32 *hi); +void ifcvf_reset(struct ifcvf_hw *hw); +u64 ifcvf_get_features(struct ifcvf_hw *hw); +u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid); +int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); +struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); +#endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c new file mode 100644 index 000000000..8b4028556 --- /dev/null +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -0,0 +1,505 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Intel IFC VF NIC driver for virtio dataplane offloading + * + * Copyright (C) 2020 Intel Corporation. + * + * Author: Zhu Lingshan <lingshan.zhu@intel.com> + * + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/sysfs.h> +#include "ifcvf_base.h" + +#define VERSION_STRING "0.1" +#define DRIVER_AUTHOR "Intel Corporation" +#define IFCVF_DRIVER_NAME "ifcvf" + +static irqreturn_t ifcvf_config_changed(int irq, void *arg) +{ + struct ifcvf_hw *vf = arg; + + if (vf->config_cb.callback) + return vf->config_cb.callback(vf->config_cb.private); + + return IRQ_HANDLED; +} + +static irqreturn_t ifcvf_intr_handler(int irq, void *arg) +{ + struct vring_info *vring = arg; + + if (vring->cb.callback) + return vring->cb.callback(vring->cb.private); + + return IRQ_HANDLED; +} + +static void ifcvf_free_irq_vectors(void *data) +{ + pci_free_irq_vectors(data); +} + +static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int i; + + + for (i = 0; i < queues; i++) { + devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]); + vf->vring[i].irq = -EINVAL; + } + + devm_free_irq(&pdev->dev, vf->config_irq, vf); + ifcvf_free_irq_vectors(pdev); +} + +static int ifcvf_request_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int vector, i, ret, irq; + + ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, + IFCVF_MAX_INTR, PCI_IRQ_MSIX); + if (ret < 0) { + IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n"); + return ret; + } + + snprintf(vf->config_msix_name, 256, "ifcvf[%s]-config\n", + pci_name(pdev)); + vector = 0; + vf->config_irq = pci_irq_vector(pdev, vector); + ret = devm_request_irq(&pdev->dev, vf->config_irq, + ifcvf_config_changed, 0, + vf->config_msix_name, vf); + if (ret) { + IFCVF_ERR(pdev, "Failed to request config irq\n"); + return ret; + } + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", + pci_name(pdev), i); + vector = i + IFCVF_MSI_QUEUE_OFF; + irq = pci_irq_vector(pdev, vector); + ret = devm_request_irq(&pdev->dev, irq, + ifcvf_intr_handler, 0, + vf->vring[i].msix_name, + &vf->vring[i]); + if (ret) { + IFCVF_ERR(pdev, + "Failed to request irq for vq %d\n", i); + ifcvf_free_irq(adapter, i); + + return ret; + } + + vf->vring[i].irq = irq; + } + + return 0; +} + +static int ifcvf_start_datapath(void *private) +{ + struct ifcvf_hw *vf = ifcvf_private_to_vf(private); + u8 status; + int ret; + + vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2; + ret = ifcvf_start_hw(vf); + if (ret < 0) { + status = ifcvf_get_status(vf); + status |= VIRTIO_CONFIG_S_FAILED; + ifcvf_set_status(vf, status); + } + + return ret; +} + +static int ifcvf_stop_datapath(void *private) +{ + struct ifcvf_hw *vf = ifcvf_private_to_vf(private); + int i; + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) + vf->vring[i].cb.callback = NULL; + + ifcvf_stop_hw(vf); + + return 0; +} + +static void ifcvf_reset_vring(struct ifcvf_adapter *adapter) +{ + struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter); + int i; + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + vf->vring[i].last_avail_idx = 0; + vf->vring[i].desc = 0; + vf->vring[i].avail = 0; + vf->vring[i].used = 0; + vf->vring[i].ready = 0; + vf->vring[i].cb.callback = NULL; + vf->vring[i].cb.private = NULL; + } + + ifcvf_reset(vf); +} + +static struct ifcvf_adapter *vdpa_to_adapter(struct vdpa_device *vdpa_dev) +{ + return container_of(vdpa_dev, struct ifcvf_adapter, vdpa); +} + +static struct ifcvf_hw *vdpa_to_vf(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev); + + return &adapter->vf; +} + +static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + u64 features; + + features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES; + + return features; +} + +static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->req_features = features; + + return 0; +} + +static u8 ifcvf_vdpa_get_status(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ifcvf_get_status(vf); +} + +static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) +{ + struct ifcvf_adapter *adapter; + struct ifcvf_hw *vf; + u8 status_old; + int ret; + + vf = vdpa_to_vf(vdpa_dev); + adapter = dev_get_drvdata(vdpa_dev->dev.parent); + status_old = ifcvf_get_status(vf); + + if (status_old == status) + return; + + if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && + !(status & VIRTIO_CONFIG_S_DRIVER_OK)) { + ifcvf_stop_datapath(adapter); + ifcvf_free_irq(adapter, IFCVF_MAX_QUEUE_PAIRS * 2); + } + + if (status == 0) { + ifcvf_reset_vring(adapter); + return; + } + + if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && + !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) { + ret = ifcvf_request_irq(adapter); + if (ret) { + status = ifcvf_get_status(vf); + status |= VIRTIO_CONFIG_S_FAILED; + ifcvf_set_status(vf, status); + return; + } + + if (ifcvf_start_datapath(adapter) < 0) + IFCVF_ERR(adapter->pdev, + "Failed to set ifcvf vdpa status %u\n", + status); + } + + ifcvf_set_status(vf, status); +} + +static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev) +{ + return IFCVF_QUEUE_MAX; +} + +static int ifcvf_vdpa_get_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_vq_state *state) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + state->avail_index = ifcvf_get_vq_state(vf, qid); + return 0; +} + +static int ifcvf_vdpa_set_vq_state(struct vdpa_device *vdpa_dev, u16 qid, + const struct vdpa_vq_state *state) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ifcvf_set_vq_state(vf, qid, state->avail_index); +} + +static void ifcvf_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, + struct vdpa_callback *cb) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->vring[qid].cb = *cb; +} + +static void ifcvf_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, + u16 qid, bool ready) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->vring[qid].ready = ready; +} + +static bool ifcvf_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return vf->vring[qid].ready; +} + +static void ifcvf_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, + u32 num) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->vring[qid].size = num; +} + +static int ifcvf_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->vring[qid].desc = desc_area; + vf->vring[qid].avail = driver_area; + vf->vring[qid].used = device_area; + + return 0; +} + +static void ifcvf_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + ifcvf_notify_queue(vf, qid); +} + +static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return ioread8(&vf->common_cfg->config_generation); +} + +static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev) +{ + return VIRTIO_ID_NET; +} + +static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) +{ + return IFCVF_SUBSYS_VENDOR_ID; +} + +static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) +{ + return IFCVF_QUEUE_ALIGNMENT; +} + +static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev, + unsigned int offset, + void *buf, unsigned int len) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + WARN_ON(offset + len > sizeof(struct virtio_net_config)); + ifcvf_read_net_config(vf, offset, buf, len); +} + +static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev, + unsigned int offset, const void *buf, + unsigned int len) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + WARN_ON(offset + len > sizeof(struct virtio_net_config)); + ifcvf_write_net_config(vf, offset, buf, len); +} + +static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, + struct vdpa_callback *cb) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->config_cb.callback = cb->callback; + vf->config_cb.private = cb->private; +} + +static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, + u16 qid) +{ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + return vf->vring[qid].irq; +} + +/* + * IFCVF currently does't have on-chip IOMMU, so not + * implemented set_map()/dma_map()/dma_unmap() + */ +static const struct vdpa_config_ops ifc_vdpa_ops = { + .get_features = ifcvf_vdpa_get_features, + .set_features = ifcvf_vdpa_set_features, + .get_status = ifcvf_vdpa_get_status, + .set_status = ifcvf_vdpa_set_status, + .get_vq_num_max = ifcvf_vdpa_get_vq_num_max, + .get_vq_state = ifcvf_vdpa_get_vq_state, + .set_vq_state = ifcvf_vdpa_set_vq_state, + .set_vq_cb = ifcvf_vdpa_set_vq_cb, + .set_vq_ready = ifcvf_vdpa_set_vq_ready, + .get_vq_ready = ifcvf_vdpa_get_vq_ready, + .set_vq_num = ifcvf_vdpa_set_vq_num, + .set_vq_address = ifcvf_vdpa_set_vq_address, + .get_vq_irq = ifcvf_vdpa_get_vq_irq, + .kick_vq = ifcvf_vdpa_kick_vq, + .get_generation = ifcvf_vdpa_get_generation, + .get_device_id = ifcvf_vdpa_get_device_id, + .get_vendor_id = ifcvf_vdpa_get_vendor_id, + .get_vq_align = ifcvf_vdpa_get_vq_align, + .get_config = ifcvf_vdpa_get_config, + .set_config = ifcvf_vdpa_set_config, + .set_config_cb = ifcvf_vdpa_set_config_cb, +}; + +static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct ifcvf_adapter *adapter; + struct ifcvf_hw *vf; + int ret, i; + + ret = pcim_enable_device(pdev); + if (ret) { + IFCVF_ERR(pdev, "Failed to enable device\n"); + return ret; + } + + ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4), + IFCVF_DRIVER_NAME); + if (ret) { + IFCVF_ERR(pdev, "Failed to request MMIO region\n"); + return ret; + } + + ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + IFCVF_ERR(pdev, "No usable DMA confiugration\n"); + return ret; + } + + ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (ret) { + IFCVF_ERR(pdev, + "No usable coherent DMA confiugration\n"); + return ret; + } + + ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); + if (ret) { + IFCVF_ERR(pdev, + "Failed for adding devres for freeing irq vectors\n"); + return ret; + } + + adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, + dev, &ifc_vdpa_ops, + IFCVF_MAX_QUEUE_PAIRS * 2); + if (adapter == NULL) { + IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); + return -ENOMEM; + } + + pci_set_master(pdev); + pci_set_drvdata(pdev, adapter); + + vf = &adapter->vf; + vf->base = pcim_iomap_table(pdev); + + adapter->pdev = pdev; + adapter->vdpa.dma_dev = &pdev->dev; + + ret = ifcvf_init_hw(vf, pdev); + if (ret) { + IFCVF_ERR(pdev, "Failed to init IFCVF hw\n"); + goto err; + } + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) + vf->vring[i].irq = -EINVAL; + + ret = vdpa_register_device(&adapter->vdpa); + if (ret) { + IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus"); + goto err; + } + + return 0; + +err: + put_device(&adapter->vdpa.dev); + return ret; +} + +static void ifcvf_remove(struct pci_dev *pdev) +{ + struct ifcvf_adapter *adapter = pci_get_drvdata(pdev); + + vdpa_unregister_device(&adapter->vdpa); +} + +static struct pci_device_id ifcvf_pci_ids[] = { + { PCI_DEVICE_SUB(IFCVF_VENDOR_ID, + IFCVF_DEVICE_ID, + IFCVF_SUBSYS_VENDOR_ID, + IFCVF_SUBSYS_DEVICE_ID) }, + { 0 }, +}; +MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids); + +static struct pci_driver ifcvf_driver = { + .name = IFCVF_DRIVER_NAME, + .id_table = ifcvf_pci_ids, + .probe = ifcvf_probe, + .remove = ifcvf_remove, +}; + +module_pci_driver(ifcvf_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(VERSION_STRING); diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile new file mode 100644 index 000000000..89a5beded --- /dev/null +++ b/drivers/vdpa/mlx5/Makefile @@ -0,0 +1,4 @@ +subdir-ccflags-y += -I$(srctree)/drivers/vdpa/mlx5/core + +obj-$(CONFIG_MLX5_VDPA_NET) += mlx5_vdpa.o +mlx5_vdpa-$(CONFIG_MLX5_VDPA_NET) += net/main.o net/mlx5_vnet.o core/resources.o core/mr.o diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h new file mode 100644 index 000000000..b6cc53ba9 --- /dev/null +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VDPA_H__ +#define __MLX5_VDPA_H__ + +#include <linux/etherdevice.h> +#include <linux/if_vlan.h> +#include <linux/vdpa.h> +#include <linux/mlx5/driver.h> + +#define MLX5V_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) + +struct mlx5_vdpa_direct_mr { + u64 start; + u64 end; + u32 perm; + struct mlx5_core_mkey mr; + struct sg_table sg_head; + int log_size; + int nsg; + int nent; + struct list_head list; + u64 offset; +}; + +struct mlx5_vdpa_mr { + struct mlx5_core_mkey mkey; + + /* list of direct MRs descendants of this indirect mr */ + struct list_head head; + unsigned long num_directs; + unsigned long num_klms; + bool initialized; + + /* serialize mkey creation and destruction */ + struct mutex mkey_mtx; +}; + +struct mlx5_vdpa_resources { + u32 pdn; + struct mlx5_uars_page *uar; + void __iomem *kick_addr; + u16 uid; + u32 null_mkey; + bool valid; +}; + +struct mlx5_vdpa_dev { + struct vdpa_device vdev; + struct mlx5_core_dev *mdev; + struct mlx5_vdpa_resources res; + + u64 mlx_features; + u64 actual_features; + u8 status; + u32 max_vqs; + u32 generation; + + struct mlx5_vdpa_mr mr; +}; + +int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid); +int mlx5_vdpa_dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid); +int mlx5_vdpa_get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey); +int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn); +void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn); +int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn); +void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn); +int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn); +void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn); +int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn); +void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn); +int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev); +void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev); +int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in, + int inlen); +int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey); +int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + bool *change_map); +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); + +#define mlx5_vdpa_warn(__dev, format, ...) \ + dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#define mlx5_vdpa_info(__dev, format, ...) \ + dev_info((__dev)->mdev->device, "%s:%d:(pid %d): " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#define mlx5_vdpa_dbg(__dev, format, ...) \ + dev_debug((__dev)->mdev->device, "%s:%d:(pid %d): " format, __func__, __LINE__, \ + current->pid, ##__VA_ARGS__) + +#endif /* __MLX5_VDPA_H__ */ diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h b/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h new file mode 100644 index 000000000..f6f57a29b --- /dev/null +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa_ifc.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VDPA_IFC_H_ +#define __MLX5_VDPA_IFC_H_ + +#include <linux/mlx5/mlx5_ifc.h> + +enum { + MLX5_VIRTIO_Q_EVENT_MODE_NO_MSIX_MODE = 0x0, + MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE = 0x1, + MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps? + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2, +}; + +enum { + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, +}; + +struct mlx5_ifc_virtio_q_bits { + u8 virtio_q_type[0x8]; + u8 reserved_at_8[0x5]; + u8 event_mode[0x3]; + u8 queue_index[0x10]; + + u8 full_emulation[0x1]; + u8 virtio_version_1_0[0x1]; + u8 reserved_at_22[0x2]; + u8 offload_type[0x4]; + u8 event_qpn_or_msix[0x18]; + + u8 doorbell_stride_index[0x10]; + u8 queue_size[0x10]; + + u8 device_emulation_id[0x20]; + + u8 desc_addr[0x40]; + + u8 used_addr[0x40]; + + u8 available_addr[0x40]; + + u8 virtio_q_mkey[0x20]; + + u8 max_tunnel_desc[0x10]; + u8 reserved_at_170[0x8]; + u8 error_type[0x8]; + + u8 umem_1_id[0x20]; + + u8 umem_1_size[0x20]; + + u8 umem_1_offset[0x40]; + + u8 umem_2_id[0x20]; + + u8 umem_2_size[0x20]; + + u8 umem_2_offset[0x40]; + + u8 umem_3_id[0x20]; + + u8 umem_3_size[0x20]; + + u8 umem_3_offset[0x40]; + + u8 counter_set_id[0x20]; + + u8 reserved_at_320[0x8]; + u8 pd[0x18]; + + u8 reserved_at_340[0xc0]; +}; + +struct mlx5_ifc_virtio_net_q_object_bits { + u8 modify_field_select[0x40]; + + u8 reserved_at_40[0x20]; + + u8 vhca_id[0x10]; + u8 reserved_at_70[0x10]; + + u8 queue_feature_bit_mask_12_3[0xa]; + u8 dirty_bitmap_dump_enable[0x1]; + u8 vhost_log_page[0x5]; + u8 reserved_at_90[0xc]; + u8 state[0x4]; + + u8 reserved_at_a0[0x5]; + u8 queue_feature_bit_mask_2_0[0x3]; + u8 tisn_or_qpn[0x18]; + + u8 dirty_bitmap_mkey[0x20]; + + u8 dirty_bitmap_size[0x20]; + + u8 dirty_bitmap_addr[0x40]; + + u8 hw_available_index[0x10]; + u8 hw_used_index[0x10]; + + u8 reserved_at_160[0xa0]; + + struct mlx5_ifc_virtio_q_bits virtio_q_context; +}; + +struct mlx5_ifc_create_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_create_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_destroy_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; +}; + +struct mlx5_ifc_query_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +enum { + MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, + MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, +}; + +enum { + MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT = 0x0, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY = 0x1, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND = 0x2, + MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR = 0x3, +}; + +enum { + MLX5_RQTC_LIST_Q_TYPE_RQ = 0x0, + MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q = 0x1, +}; + +struct mlx5_ifc_modify_virtio_net_q_in_bits { + struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr; + + struct mlx5_ifc_virtio_net_q_object_bits obj_context; +}; + +struct mlx5_ifc_modify_virtio_net_q_out_bits { + struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr; +}; + +#endif /* __MLX5_VDPA_IFC_H_ */ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c new file mode 100644 index 000000000..1f94ea46c --- /dev/null +++ b/drivers/vdpa/mlx5/core/mr.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/vdpa.h> +#include <linux/gcd.h> +#include <linux/string.h> +#include <linux/mlx5/qp.h> +#include "mlx5_vdpa.h" + +/* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */ +#define MLX5_DIV_ROUND_UP_POW2(_n, _s) \ +({ \ + u64 __s = _s; \ + u64 _res; \ + _res = (((_n) + (1 << (__s)) - 1) >> (__s)); \ + _res; \ +}) + +static int get_octo_len(u64 len, int page_shift) +{ + u64 page_size = 1ULL << page_shift; + int npages; + + npages = ALIGN(len, page_size) >> page_shift; + return (npages + 1) / 2; +} + +static void mlx5_set_access_mode(void *mkc, int mode) +{ + MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2); +} + +static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt) +{ + struct scatterlist *sg; + int nsg = mr->nsg; + u64 dma_addr; + u64 dma_len; + int j = 0; + int i; + + for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) { + for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg); + nsg && dma_len; + nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size)) + mtt[j++] = cpu_to_be64(dma_addr); + } +} + +static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + int inlen; + void *mkc; + void *in; + int err; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16); + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO)); + MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO)); + mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); + MLX5_SET64(mkc, mkc, start_addr, mr->offset); + MLX5_SET64(mkc, mkc, len, mr->end - mr->start); + MLX5_SET(mkc, mkc, log_page_size, mr->log_size); + MLX5_SET(mkc, mkc, translations_octword_size, + get_octo_len(mr->end - mr->start, mr->log_size)); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(mr->end - mr->start, mr->log_size)); + populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt)); + err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen); + kvfree(in); + if (err) { + mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n"); + return err; + } + + return 0; +} + +static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + mlx5_vdpa_destroy_mkey(mvdev, &mr->mr); +} + +static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return max_t(u64, map->start, mr->start); +} + +static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return min_t(u64, map->last + 1, mr->end); +} + +static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr) +{ + return map_end(map, mr) - map_start(map, mr); +} + +#define MLX5_VDPA_INVALID_START_ADDR ((u64)-1) +#define MLX5_VDPA_INVALID_LEN ((u64)-1) + +static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey) +{ + struct mlx5_vdpa_direct_mr *s; + + s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); + if (!s) + return MLX5_VDPA_INVALID_START_ADDR; + + return s->start; +} + +static u64 indir_len(struct mlx5_vdpa_mr *mkey) +{ + struct mlx5_vdpa_direct_mr *s; + struct mlx5_vdpa_direct_mr *e; + + s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list); + if (!s) + return MLX5_VDPA_INVALID_LEN; + + e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list); + + return e->end - s->start; +} + +#define LOG_MAX_KLM_SIZE 30 +#define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE) + +static u32 klm_bcount(u64 size) +{ + return (u32)size; +} + +static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in) +{ + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_klm *klmarr; + struct mlx5_klm *klm; + bool first = true; + u64 preve; + int i; + + klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + i = 0; + list_for_each_entry(dmr, &mkey->head, list) { +again: + klm = &klmarr[i++]; + if (first) { + preve = dmr->start; + first = false; + } + + if (preve == dmr->start) { + klm->key = cpu_to_be32(dmr->mr.key); + klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start)); + preve = dmr->end; + } else { + klm->key = cpu_to_be32(mvdev->res.null_mkey); + klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve)); + preve = dmr->start; + goto again; + } + } +} + +static int klm_byte_size(int nklms) +{ + return 16 * ALIGN(nklms, 4); +} + +static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) +{ + int inlen; + void *mkc; + void *in; + int err; + u64 start; + u64 len; + + start = indir_start_addr(mr); + len = indir_len(mr); + if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN) + return -EINVAL; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mvdev->res.pdn); + MLX5_SET64(mkc, mkc, start_addr, start); + MLX5_SET64(mkc, mkc, len, len); + MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms); + fill_indir(mvdev, mr, in); + err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen); + kfree(in); + return err; +} + +static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey) +{ + mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey); +} + +static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr, + struct vhost_iotlb *iotlb) +{ + struct vhost_iotlb_map *map; + unsigned long lgcd = 0; + int log_entity_size; + unsigned long size; + u64 start = 0; + int err; + struct page *pg; + unsigned int nsg; + int sglen; + u64 pa; + u64 paend; + struct scatterlist *sg; + struct device *dma = mvdev->mdev->device; + + for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); + map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { + size = maplen(map, mr); + lgcd = gcd(lgcd, size); + start += size; + } + log_entity_size = ilog2(lgcd); + + sglen = 1 << log_entity_size; + nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size); + + err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL); + if (err) + return err; + + sg = mr->sg_head.sgl; + for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); + map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) { + paend = map->addr + maplen(map, mr); + for (pa = map->addr; pa < paend; pa += sglen) { + pg = pfn_to_page(__phys_to_pfn(pa)); + if (!sg) { + mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n", + map->start, map->last + 1); + err = -ENOMEM; + goto err_map; + } + sg_set_page(sg, pg, sglen, 0); + sg = sg_next(sg); + if (!sg) + goto done; + } + } +done: + mr->log_size = log_entity_size; + mr->nsg = nsg; + mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + if (!mr->nent) { + err = -ENOMEM; + goto err_map; + } + + err = create_direct_mr(mvdev, mr); + if (err) + goto err_direct; + + return 0; + +err_direct: + dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); +err_map: + sg_free_table(&mr->sg_head); + return err; +} + +static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr) +{ + struct device *dma = mvdev->mdev->device; + + destroy_direct_mr(mvdev, mr); + dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); + sg_free_table(&mr->sg_head); +} + +static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, + struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + LIST_HEAD(tmp); + u64 st; + u64 sz; + int err; + int i = 0; + + st = start; + while (size) { + sz = (u32)min_t(u64, MAX_KLM_SIZE, size); + dmr = kzalloc(sizeof(*dmr), GFP_KERNEL); + if (!dmr) { + err = -ENOMEM; + goto err_alloc; + } + + dmr->start = st; + dmr->end = st + sz; + dmr->perm = perm; + err = map_direct_mr(mvdev, dmr, iotlb); + if (err) { + kfree(dmr); + goto err_alloc; + } + + list_add_tail(&dmr->list, &tmp); + size -= sz; + mr->num_directs++; + mr->num_klms++; + st += sz; + i++; + } + list_splice_tail(&tmp, &mr->head); + return 0; + +err_alloc: + list_for_each_entry_safe(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + return err; +} + +/* The iotlb pointer contains a list of maps. Go over the maps, possibly + * merging mergeable maps, and create direct memory keys that provide the + * device access to memory. The direct mkeys are then referred to by the + * indirect memory key that provides access to the enitre address space given + * by iotlb. + */ +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + struct vhost_iotlb_map *map; + u32 pperm = U16_MAX; + u64 last = U64_MAX; + u64 ps = U64_MAX; + u64 pe = U64_MAX; + u64 start = 0; + int err = 0; + int nnuls; + + if (mr->initialized) + return 0; + + INIT_LIST_HEAD(&mr->head); + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + start = map->start; + if (pe == map->start && pperm == map->perm) { + pe = map->last + 1; + } else { + if (ps != U64_MAX) { + if (pe < map->start) { + /* We have a hole in the map. Check how + * many null keys are required to fill it. + */ + nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe, + LOG_MAX_KLM_SIZE); + mr->num_klms += nnuls; + } + err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + if (err) + goto err_chain; + } + ps = map->start; + pe = map->last + 1; + pperm = map->perm; + } + } + err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); + if (err) + goto err_chain; + + /* Create the memory key that defines the guests's address space. This + * memory key refers to the direct keys that contain the MTT + * translations + */ + err = create_indirect_key(mvdev, mr); + if (err) + goto err_chain; + + mr->initialized = true; + return 0; + +err_chain: + list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + return err; +} + +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + int err; + + mutex_lock(&mr->mkey_mtx); + err = _mlx5_vdpa_create_mr(mvdev, iotlb); + mutex_unlock(&mr->mkey_mtx); + return err; +} + +void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + struct mlx5_vdpa_direct_mr *dmr; + struct mlx5_vdpa_direct_mr *n; + + mutex_lock(&mr->mkey_mtx); + if (!mr->initialized) + goto out; + + destroy_indirect_key(mvdev, mr); + list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) { + list_del_init(&dmr->list); + unmap_direct_mr(mvdev, dmr); + kfree(dmr); + } + mr->initialized = false; +out: + mutex_unlock(&mr->mkey_mtx); +} + +int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + bool *change_map) +{ + struct mlx5_vdpa_mr *mr = &mvdev->mr; + int err = 0; + + *change_map = false; + mutex_lock(&mr->mkey_mtx); + if (mr->initialized) { + mlx5_vdpa_info(mvdev, "memory map update\n"); + *change_map = true; + } + if (!*change_map) + err = _mlx5_vdpa_create_mr(mvdev, iotlb); + mutex_unlock(&mr->mkey_mtx); + + return err; +} diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c new file mode 100644 index 000000000..96e6421c5 --- /dev/null +++ b/drivers/vdpa/mlx5/core/resources.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/mlx5/driver.h> +#include "mlx5_vdpa.h" + +static int alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; + int err; + + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + MLX5_SET(alloc_pd_in, in, uid, uid); + + err = mlx5_cmd_exec_inout(mdev, alloc_pd, in, out); + if (!err) + *pdn = MLX5_GET(alloc_pd_out, out, pd); + + return err; +} + +static int dealloc_pd(struct mlx5_vdpa_dev *dev, u32 pdn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; + struct mlx5_core_dev *mdev = dev->mdev; + + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + MLX5_SET(dealloc_pd_in, in, uid, uid); + return mlx5_cmd_exec_in(mdev, dealloc_pd, in); +} + +static int get_null_mkey(struct mlx5_vdpa_dev *dev, u32 *null_mkey) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + struct mlx5_core_dev *mdev = dev->mdev; + int err; + + MLX5_SET(query_special_contexts_in, in, opcode, MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec_inout(mdev, query_special_contexts, in, out); + if (!err) + *null_mkey = MLX5_GET(query_special_contexts_out, out, null_mkey); + return err; +} + +static int create_uctx(struct mlx5_vdpa_dev *mvdev, u16 *uid) +{ + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; + int inlen; + void *in; + int err; + + /* 0 means not supported */ + if (!MLX5_CAP_GEN(mvdev->mdev, log_max_uctx)) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(create_uctx_in); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(create_uctx_in, in, uctx.cap, MLX5_UCTX_CAP_RAW_TX); + + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (!err) + *uid = MLX5_GET(create_uctx_out, out, uid); + + return err; +} + +static void destroy_uctx(struct mlx5_vdpa_dev *mvdev, u32 uid) +{ + u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); + + mlx5_cmd_exec(mvdev->mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn) +{ + u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {}; + int err; + + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + MLX5_SET(create_tis_in, in, uid, mvdev->res.uid); + err = mlx5_cmd_exec_inout(mvdev->mdev, create_tis, in, out); + if (!err) + *tisn = MLX5_GET(create_tis_out, out, tisn); + + return err; +} + +void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_tis_in, in, tisn, tisn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_tis, in); +} + +int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn) +{ + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {}; + int err; + + MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out)); + if (!err) + *rqtn = MLX5_GET(create_rqt_out, out, rqtn); + + return err; +} + +void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; + + MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_rqt, in); +} + +int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn) +{ + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {}; + int err; + + MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); + err = mlx5_cmd_exec_inout(mvdev->mdev, create_tir, in, out); + if (!err) + *tirn = MLX5_GET(create_tir_out, out, tirn); + + return err; +} + +void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {}; + + MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_tir_in, in, tirn, tirn); + mlx5_cmd_exec_in(mvdev->mdev, destroy_tir, in); +} + +int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn) +{ + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {}; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {}; + int err; + + MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + MLX5_SET(alloc_transport_domain_in, in, uid, mvdev->res.uid); + + err = mlx5_cmd_exec_inout(mvdev->mdev, alloc_transport_domain, in, out); + if (!err) + *tdn = MLX5_GET(alloc_transport_domain_out, out, transport_domain); + + return err; +} + +void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {}; + + MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, uid, mvdev->res.uid); + MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); + mlx5_cmd_exec_in(mvdev->mdev, dealloc_transport_domain, in); +} + +int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in, + int inlen) +{ + u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {}; + u32 mkey_index; + void *mkc; + int err; + + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid); + + err = mlx5_cmd_exec(mvdev->mdev, in, inlen, lout, sizeof(lout)); + if (err) + return err; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->key |= mlx5_idx_to_mkey(mkey_index); + mkey->pd = MLX5_GET(mkc, mkc, pd); + return 0; +} + +int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey) +{ + u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {}; + + MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid); + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in); +} + +int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) +{ + u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset); + struct mlx5_vdpa_resources *res = &mvdev->res; + struct mlx5_core_dev *mdev = mvdev->mdev; + u64 kick_addr; + int err; + + if (res->valid) { + mlx5_vdpa_warn(mvdev, "resources already allocated\n"); + return -EINVAL; + } + mutex_init(&mvdev->mr.mkey_mtx); + res->uar = mlx5_get_uars_page(mdev); + if (IS_ERR(res->uar)) { + err = PTR_ERR(res->uar); + goto err_uars; + } + + err = create_uctx(mvdev, &res->uid); + if (err) + goto err_uctx; + + err = alloc_pd(mvdev, &res->pdn, res->uid); + if (err) + goto err_pd; + + err = get_null_mkey(mvdev, &res->null_mkey); + if (err) + goto err_key; + + kick_addr = pci_resource_start(mdev->pdev, 0) + offset; + res->kick_addr = ioremap(kick_addr, PAGE_SIZE); + if (!res->kick_addr) { + err = -ENOMEM; + goto err_key; + } + res->valid = true; + + return 0; + +err_key: + dealloc_pd(mvdev, res->pdn, res->uid); +err_pd: + destroy_uctx(mvdev, res->uid); +err_uctx: + mlx5_put_uars_page(mdev, res->uar); +err_uars: + mutex_destroy(&mvdev->mr.mkey_mtx); + return err; +} + +void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_vdpa_resources *res = &mvdev->res; + + if (!res->valid) + return; + + iounmap(res->kick_addr); + res->kick_addr = NULL; + dealloc_pd(mvdev, res->pdn, res->uid); + destroy_uctx(mvdev, res->uid); + mlx5_put_uars_page(mvdev->mdev, res->uar); + mutex_destroy(&mvdev->mr.mkey_mtx); + res->valid = false; +} diff --git a/drivers/vdpa/mlx5/net/main.c b/drivers/vdpa/mlx5/net/main.c new file mode 100644 index 000000000..838cd9838 --- /dev/null +++ b/drivers/vdpa/mlx5/net/main.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> +#include "mlx5_vdpa_ifc.h" +#include "mlx5_vnet.h" + +MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); +MODULE_DESCRIPTION("Mellanox VDPA driver"); +MODULE_LICENSE("Dual BSD/GPL"); + +static bool required_caps_supported(struct mlx5_core_dev *mdev) +{ + u8 event_mode; + u64 got; + + got = MLX5_CAP_GEN_64(mdev, general_obj_types); + + if (!(got & MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q)) + return false; + + event_mode = MLX5_CAP_DEV_VDPA_EMULATION(mdev, event_mode); + if (!(event_mode & MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE)) + return false; + + if (!MLX5_CAP_DEV_VDPA_EMULATION(mdev, eth_frame_offload_type)) + return false; + + return true; +} + +static void *mlx5_vdpa_add(struct mlx5_core_dev *mdev) +{ + struct mlx5_vdpa_dev *vdev; + + if (mlx5_core_is_pf(mdev)) + return NULL; + + if (!required_caps_supported(mdev)) { + dev_info(mdev->device, "virtio net emulation not supported\n"); + return NULL; + } + vdev = mlx5_vdpa_add_dev(mdev); + if (IS_ERR(vdev)) + return NULL; + + return vdev; +} + +static void mlx5_vdpa_remove(struct mlx5_core_dev *mdev, void *context) +{ + struct mlx5_vdpa_dev *vdev = context; + + mlx5_vdpa_remove_dev(vdev); +} + +static struct mlx5_interface mlx5_vdpa_interface = { + .add = mlx5_vdpa_add, + .remove = mlx5_vdpa_remove, + .protocol = MLX5_INTERFACE_PROTOCOL_VDPA, +}; + +static int __init mlx5_vdpa_init(void) +{ + return mlx5_register_interface(&mlx5_vdpa_interface); +} + +static void __exit mlx5_vdpa_exit(void) +{ + mlx5_unregister_interface(&mlx5_vdpa_interface); +} + +module_init(mlx5_vdpa_init); +module_exit(mlx5_vdpa_exit); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c new file mode 100644 index 000000000..577ff786f --- /dev/null +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -0,0 +1,2053 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#include <linux/vdpa.h> +#include <uapi/linux/virtio_ids.h> +#include <linux/virtio_config.h> +#include <linux/mlx5/qp.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/device.h> +#include <linux/mlx5/mpfs.h> +#include "mlx5_vnet.h" +#include "mlx5_vdpa_ifc.h" +#include "mlx5_vdpa.h" + +#define to_mvdev(__vdev) container_of((__vdev), struct mlx5_vdpa_dev, vdev) + +#define VALID_FEATURES_MASK \ + (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \ + BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \ + BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \ + BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \ + BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \ + BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \ + BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \ + BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \ + BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \ + BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \ + BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \ + BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \ + BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV)) + +#define VALID_STATUS_MASK \ + (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \ + VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED) + +struct mlx5_vdpa_net_resources { + u32 tisn; + u32 tdn; + u32 tirn; + u32 rqtn; + bool valid; +}; + +struct mlx5_vdpa_cq_buf { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int cqe_size; + int nent; +}; + +struct mlx5_vdpa_cq { + struct mlx5_core_cq mcq; + struct mlx5_vdpa_cq_buf buf; + struct mlx5_db db; + int cqe; +}; + +struct mlx5_vdpa_umem { + struct mlx5_frag_buf_ctrl fbc; + struct mlx5_frag_buf frag_buf; + int size; + u32 id; +}; + +struct mlx5_vdpa_qp { + struct mlx5_core_qp mqp; + struct mlx5_frag_buf frag_buf; + struct mlx5_db db; + u16 head; + bool fw; +}; + +struct mlx5_vq_restore_info { + u32 num_ent; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u16 avail_index; + u16 used_index; + bool ready; + struct vdpa_callback cb; + bool restore; +}; + +struct mlx5_vdpa_virtqueue { + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num_ent; + struct vdpa_callback event_cb; + + /* Resources for implementing the notification channel from the device + * to the driver. fwqp is the firmware end of an RC connection; the + * other end is vqqp used by the driver. cq is is where completions are + * reported. + */ + struct mlx5_vdpa_cq cq; + struct mlx5_vdpa_qp fwqp; + struct mlx5_vdpa_qp vqqp; + + /* umem resources are required for the virtqueue operation. They're use + * is internal and they must be provided by the driver. + */ + struct mlx5_vdpa_umem umem1; + struct mlx5_vdpa_umem umem2; + struct mlx5_vdpa_umem umem3; + + bool initialized; + int index; + u32 virtq_id; + struct mlx5_vdpa_net *ndev; + u16 avail_idx; + u16 used_idx; + int fw_state; + + /* keep last in the struct */ + struct mlx5_vq_restore_info ri; +}; + +/* We will remove this limitation once mlx5_vdpa_alloc_resources() + * provides for driver space allocation + */ +#define MLX5_MAX_SUPPORTED_VQS 16 + +struct mlx5_vdpa_net { + struct mlx5_vdpa_dev mvdev; + struct mlx5_vdpa_net_resources res; + struct virtio_net_config config; + struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS]; + + /* Serialize vq resources creation and destruction. This is required + * since memory map might change and we need to destroy and create + * resources while driver in operational. + */ + struct mutex reslock; + struct mlx5_flow_table *rxft; + struct mlx5_fc *rx_counter; + struct mlx5_flow_handle *rx_rule; + bool setup; + u16 mtu; +}; + +static void free_resources(struct mlx5_vdpa_net *ndev); +static void init_mvqs(struct mlx5_vdpa_net *ndev); +static int setup_driver(struct mlx5_vdpa_net *ndev); +static void teardown_driver(struct mlx5_vdpa_net *ndev); + +static bool mlx5_vdpa_debug; + +#define MLX5_LOG_VIO_FLAG(_feature) \ + do { \ + if (features & BIT_ULL(_feature)) \ + mlx5_vdpa_info(mvdev, "%s\n", #_feature); \ + } while (0) + +#define MLX5_LOG_VIO_STAT(_status) \ + do { \ + if (status & (_status)) \ + mlx5_vdpa_info(mvdev, "%s\n", #_status); \ + } while (0) + +static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set) +{ + if (status & ~VALID_STATUS_MASK) + mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n", + status & ~VALID_STATUS_MASK); + + if (!mlx5_vdpa_debug) + return; + + mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get"); + if (set && !status) { + mlx5_vdpa_info(mvdev, "driver resets the device\n"); + return; + } + + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET); + MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED); +} + +static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set) +{ + if (features & ~VALID_FEATURES_MASK) + mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n", + features & ~VALID_FEATURES_MASK); + + if (!mlx5_vdpa_debug) + return; + + mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads"); + if (!features) + mlx5_vdpa_info(mvdev, "all feature bits are cleared\n"); + + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY); + MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX); + MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT); + MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM); + MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED); + MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM); + MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV); +} + +static int create_tis(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_dev *mvdev = &ndev->mvdev; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + int err; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn); + err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn); + if (err) + mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err); + + return err; +} + +static void destroy_tis(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn); +} + +#define MLX5_VDPA_CQE_SIZE 64 +#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE) + +static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent) +{ + struct mlx5_frag_buf *frag_buf = &buf->frag_buf; + u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE; + u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE; + int err; + + err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf, + ndev->mvdev.mdev->priv.numa_node); + if (err) + return err; + + mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc); + + buf->cqe_size = MLX5_VDPA_CQE_SIZE; + buf->nent = nent; + + return 0; +} + +static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size) +{ + struct mlx5_frag_buf *frag_buf = &umem->frag_buf; + + return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf, + ndev->mvdev.mdev->priv.numa_node); +} + +static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf); +} + +static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n) +{ + return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n); +} + +static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf) +{ + struct mlx5_cqe64 *cqe64; + void *cqe; + int i; + + for (i = 0; i < buf->nent; i++) { + cqe = get_cqe(vcq, i); + cqe64 = cqe; + cqe64->op_own = MLX5_CQE_INVALID << 4; + } +} + +static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n) +{ + struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1)); + + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && + !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe))) + return cqe64; + + return NULL; +} + +static void rx_post(struct mlx5_vdpa_qp *vqp, int n) +{ + vqp->head += n; + vqp->db.db[0] = cpu_to_be32(vqp->head); +} + +static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in, + struct mlx5_vdpa_virtqueue *mvq, u32 num_ent) +{ + struct mlx5_vdpa_qp *vqp; + __be64 *pas; + void *qpc; + + vqp = fw ? &mvq->fwqp : &mvq->vqqp; + MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + if (vqp->fw) { + /* Firmware QP is allocated by the driver for the firmware's + * use so we can skip part of the params as they will be chosen by firmware + */ + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ); + MLX5_SET(qpc, qpc, no_sq, 1); + return; + } + + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index); + MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, no_sq, 1); + MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent)); + MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas); + mlx5_fill_page_frag_array(&vqp->frag_buf, pas); +} + +static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent) +{ + return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, + num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf, + ndev->mvdev.mdev->priv.numa_node); +} + +static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf); +} + +static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_vdpa_qp *vqp) +{ + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {}; + void *qpc; + void *in; + int err; + + if (!vqp->fw) { + vqp = &mvq->vqqp; + err = rq_buf_alloc(ndev, vqp, mvq->num_ent); + if (err) + return err; + + err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db); + if (err) + goto err_db; + inlen += vqp->frag_buf.npages * sizeof(__be64); + } + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_kzalloc; + } + + qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + if (!vqp->fw) + MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma); + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); + err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (err) + goto err_kzalloc; + + vqp->mqp.uid = ndev->mvdev.res.uid; + vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn); + + if (!vqp->fw) + rx_post(vqp, mvq->num_ent); + + return 0; + +err_kzalloc: + if (!vqp->fw) + mlx5_db_free(ndev->mvdev.mdev, &vqp->db); +err_db: + if (!vqp->fw) + rq_buf_free(ndev, vqp); + + return err; +} + +static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp) +{ + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {}; + + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn); + MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid); + if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in)) + mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn); + if (!vqp->fw) { + mlx5_db_free(ndev->mvdev.mdev, &vqp->db); + rq_buf_free(ndev, vqp); + } +} + +static void *next_cqe_sw(struct mlx5_vdpa_cq *cq) +{ + return get_sw_cqe(cq, cq->mcq.cons_index); +} + +static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) +{ + struct mlx5_cqe64 *cqe64; + + cqe64 = next_cqe_sw(vcq); + if (!cqe64) + return -EAGAIN; + + vcq->mcq.cons_index++; + return 0; +} + +static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) +{ + mlx5_cq_set_ci(&mvq->cq.mcq); + + /* make sure CQ cosumer update is visible to the hardware before updating + * RX doorbell record. + */ + dma_wmb(); + rx_post(&mvq->vqqp, num); + if (mvq->event_cb.callback) + mvq->event_cb.callback(mvq->event_cb.private); +} + +static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) +{ + struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq); + struct mlx5_vdpa_net *ndev = mvq->ndev; + void __iomem *uar_page = ndev->mvdev.res.uar->map; + int num = 0; + + while (!mlx5_vdpa_poll_one(&mvq->cq)) { + num++; + if (num > mvq->num_ent / 2) { + /* If completions keep coming while we poll, we want to + * let the hardware know that we consumed them by + * updating the doorbell record. We also let vdpa core + * know about this so it passes it on the virtio driver + * on the guest. + */ + mlx5_vdpa_handle_completions(mvq, num); + num = 0; + } + } + + if (num) + mlx5_vdpa_handle_completions(mvq, num); + + mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); +} + +static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) +{ + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + void __iomem *uar_page = ndev->mvdev.res.uar->map; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_vdpa_cq *vcq = &mvq->cq; + __be64 *pas; + int inlen; + void *cqc; + void *in; + int err; + int eqn; + + err = mlx5_db_alloc(mdev, &vcq->db); + if (err) + return err; + + vcq->mcq.set_ci_db = vcq->db.db; + vcq->mcq.arm_db = vcq->db.db + 1; + vcq->mcq.cqe_sz = 64; + + err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); + if (err) + goto err_db; + + cq_frag_buf_init(vcq, &vcq->buf); + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages; + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_vzalloc; + } + + MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid); + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); + mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas); + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + /* Use vector 0 by default. Consider adding code to choose least used + * vector. + */ + err = mlx5_vector2eqn(mdev, 0, &eqn); + if (err) + goto err_vec; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); + MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); + + err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); + if (err) + goto err_vec; + + vcq->mcq.comp = mlx5_vdpa_cq_comp; + vcq->cqe = num_ent; + vcq->mcq.set_ci_db = vcq->db.db; + vcq->mcq.arm_db = vcq->db.db + 1; + mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); + kfree(in); + return 0; + +err_vec: + kfree(in); +err_vzalloc: + cq_frag_buf_free(ndev, &vcq->buf); +err_db: + mlx5_db_free(ndev->mvdev.mdev, &vcq->db); + return err; +} + +static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx) +{ + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + struct mlx5_vdpa_cq *vcq = &mvq->cq; + + if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) { + mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn); + return; + } + cq_frag_buf_free(ndev, &vcq->buf); + mlx5_db_free(ndev->mvdev.mdev, &vcq->db); +} + +static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num, + struct mlx5_vdpa_umem **umemp) +{ + struct mlx5_core_dev *mdev = ndev->mvdev.mdev; + int p_a; + int p_b; + + switch (num) { + case 1: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b); + *umemp = &mvq->umem1; + break; + case 2: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b); + *umemp = &mvq->umem2; + break; + case 3: + p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a); + p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b); + *umemp = &mvq->umem3; + break; + } + (*umemp)->size = p_a * mvq->num_ent + p_b; +} + +static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem) +{ + mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf); +} + +static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) +{ + int inlen; + u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {}; + void *um; + void *in; + int err; + __be64 *pas; + struct mlx5_vdpa_umem *umem; + + set_umem_size(ndev, mvq, num, &umem); + err = umem_frag_buf_alloc(ndev, umem, umem->size); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_in; + } + + MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM); + MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid); + um = MLX5_ADDR_OF(create_umem_in, in, umem); + MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages); + + pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]); + mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err); + goto err_cmd; + } + + kfree(in); + umem->id = MLX5_GET(create_umem_out, out, umem_id); + + return 0; + +err_cmd: + kfree(in); +err_in: + umem_frag_buf_free(ndev, umem); + return err; +} + +static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num) +{ + u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {}; + struct mlx5_vdpa_umem *umem; + + switch (num) { + case 1: + umem = &mvq->umem1; + break; + case 2: + umem = &mvq->umem2; + break; + case 3: + umem = &mvq->umem3; + break; + } + + MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, in, umem_id, umem->id); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) + return; + + umem_frag_buf_free(ndev, umem); +} + +static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int num; + int err; + + for (num = 1; num <= 3; num++) { + err = create_umem(ndev, mvq, num); + if (err) + goto err_umem; + } + return 0; + +err_umem: + for (num--; num > 0; num--) + umem_destroy(ndev, mvq, num); + + return err; +} + +static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int num; + + for (num = 3; num > 0; num--) + umem_destroy(ndev, mvq, num); +} + +static int get_queue_type(struct mlx5_vdpa_net *ndev) +{ + u32 type_mask; + + type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); + + /* prefer split queue */ + if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED) + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; + + WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)); + + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; +} + +static bool vq_is_tx(u16 idx) +{ + return idx % 2; +} + +static u16 get_features_12_3(u64 features) +{ + return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) | + (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) | + (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) | + (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6); +} + +static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in); + u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {}; + void *obj_context; + void *cmd_hdr; + void *vq_ctx; + void *in; + int err; + + err = umems_create(ndev, mvq); + if (err) + return err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_alloc; + } + + cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context); + MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx); + MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx); + MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3, + get_features_12_3(ndev->mvdev.actual_features)); + vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context); + MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev)); + + if (vq_is_tx(mvq->index)) + MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn); + + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE); + MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index); + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn); + MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent); + MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, + !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1))); + MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr); + MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr); + MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr); + MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key); + MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id); + MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size); + MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id); + MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size); + MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); + MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); + MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + if (err) + goto err_cmd; + + kfree(in); + mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; + +err_cmd: + kfree(in); +err_alloc: + umems_destroy(ndev, mvq); + return err; +} + +static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {}; + + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid); + MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type, + MLX5_OBJ_TYPE_VIRTIO_NET_Q); + if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) { + mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id); + return; + } + umems_destroy(ndev, mvq); +} + +static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) +{ + return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn; +} + +static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw) +{ + return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn; +} + +static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out, + int *outlen, u32 qpn, u32 rqpn) +{ + void *qpc; + void *pp; + + switch (cmd) { + case MLX5_CMD_OP_2RST_QP: + *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in); + *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(*outlen, GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(qp_2rst_in, *in, opcode, cmd); + MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(qp_2rst_in, *in, qpn, qpn); + break; + case MLX5_CMD_OP_RST2INIT_QP: + *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in); + *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(rst2init_qp_in, *in, opcode, cmd); + MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(rst2init_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, remote_qpn, rqpn); + MLX5_SET(qpc, qpc, rwe, 1); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, vhca_port_num, 1); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in); + *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(init2rtr_qp_in, *in, opcode, cmd); + MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(init2rtr_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES); + MLX5_SET(qpc, qpc, log_msg_max, 30); + MLX5_SET(qpc, qpc, remote_qpn, rqpn); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, fl, 1); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in); + *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out); + *in = kzalloc(*inlen, GFP_KERNEL); + *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL); + if (!*in || !*out) + goto outerr; + + MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd); + MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid); + MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn); + qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc); + pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, pp, ack_timeout, 14); + MLX5_SET(qpc, qpc, retry_count, 7); + MLX5_SET(qpc, qpc, rnr_retry, 7); + break; + default: + goto outerr_nullify; + } + + return; + +outerr: + kfree(*in); + kfree(*out); +outerr_nullify: + *in = NULL; + *out = NULL; +} + +static void free_inout(void *in, void *out) +{ + kfree(in); + kfree(out); +} + +/* Two QPs are used by each virtqueue. One is used by the driver and one by + * firmware. The fw argument indicates whether the subjected QP is the one used + * by firmware. + */ +static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd) +{ + int outlen; + int inlen; + void *out; + void *in; + int err; + + alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw)); + if (!in || !out) + return -ENOMEM; + + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen); + free_inout(in, out); + return err; +} + +static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + int err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP); + if (err) + return err; + + err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP); + if (err) + return err; + + return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP); +} + +struct mlx5_virtq_attr { + u8 state; + u16 available_index; + u16 used_index; +}; + +static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, + struct mlx5_virtq_attr *attr) +{ + int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out); + u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {}; + void *out; + void *obj_context; + void *cmd_hdr; + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen); + if (err) + goto err_cmd; + + obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context); + memset(attr, 0, sizeof(*attr)); + attr->state = MLX5_GET(virtio_net_q_object, obj_context, state); + attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index); + attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index); + kfree(out); + return 0; + +err_cmd: + kfree(out); + return err; +} + +static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in); + u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {}; + void *obj_context; + void *cmd_hdr; + void *in; + int err; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr); + + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id); + MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid); + + obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context); + MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, + MLX5_VIRTQ_MODIFY_MASK_STATE); + MLX5_SET(virtio_net_q_object, obj_context, state, state); + err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); + kfree(in); + if (!err) + mvq->fw_state = state; + + return err; +} + +static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + u16 idx = mvq->index; + int err; + + if (!mvq->num_ent) + return 0; + + if (mvq->initialized) { + mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n"); + return -EINVAL; + } + + err = cq_create(ndev, idx, mvq->num_ent); + if (err) + return err; + + err = qp_create(ndev, mvq, &mvq->fwqp); + if (err) + goto err_fwqp; + + err = qp_create(ndev, mvq, &mvq->vqqp); + if (err) + goto err_vqqp; + + err = connect_qps(ndev, mvq); + if (err) + goto err_connect; + + err = create_virtqueue(ndev, mvq); + if (err) + goto err_connect; + + if (mvq->ready) { + err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n", + idx, err); + goto err_connect; + } + } + + mvq->initialized = true; + return 0; + +err_connect: + qp_destroy(ndev, &mvq->vqqp); +err_vqqp: + qp_destroy(ndev, &mvq->fwqp); +err_fwqp: + cq_destroy(ndev, idx); + return err; +} + +static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_virtq_attr attr; + + if (!mvq->initialized) + return; + + if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) + return; + + if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND)) + mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n"); + + if (query_virtqueue(ndev, mvq, &attr)) { + mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n"); + return; + } + mvq->avail_idx = attr.available_index; + mvq->used_idx = attr.used_index; +} + +static void suspend_vqs(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < MLX5_MAX_SUPPORTED_VQS; i++) + suspend_vq(ndev, &ndev->vqs[i]); +} + +static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + if (!mvq->initialized) + return; + + suspend_vq(ndev, mvq); + destroy_virtqueue(ndev, mvq); + qp_destroy(ndev, &mvq->vqqp); + qp_destroy(ndev, &mvq->fwqp); + cq_destroy(ndev, mvq->index); + mvq->initialized = false; +} + +static int create_rqt(struct mlx5_vdpa_net *ndev) +{ + int log_max_rqt; + __be32 *list; + void *rqtc; + int inlen; + void *in; + int i, j; + int err; + + log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size)); + if (log_max_rqt < 1) + return -EOPNOTSUPP; + + inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num); + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid); + rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); + + MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q); + MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt); + MLX5_SET(rqtc, rqtc, rqt_actual_size, 1); + list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]); + for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) { + if (!ndev->vqs[j].initialized) + continue; + + if (!vq_is_tx(ndev->vqs[j].index)) { + list[i] = cpu_to_be32(ndev->vqs[j].virtq_id); + i++; + } + } + + err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn); + kfree(in); + if (err) + return err; + + return 0; +} + +static void destroy_rqt(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn); +} + +static int create_tir(struct mlx5_vdpa_net *ndev) +{ +#define HASH_IP_L4PORTS \ + (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \ + MLX5_HASH_FIELD_SEL_L4_DPORT) + static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, + 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, + 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, + 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, + 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a }; + void *rss_key; + void *outer; + void *tirc; + void *in; + int err; + + in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid); + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ); + rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); + memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key)); + + outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); + MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); + MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS); + + MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn); + MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn); + + err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn); + kfree(in); + return err; +} + +static void destroy_tir(struct mlx5_vdpa_net *ndev) +{ + mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn); +} + +static int add_fwd_to_tir(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_flow_destination dest[2] = {}; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_namespace *ns; + int err; + + /* for now, one entry, match all, forward to tir */ + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + + ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); + if (!ns) { + mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n"); + return -EOPNOTSUPP; + } + + ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ndev->rxft)) + return PTR_ERR(ndev->rxft); + + ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); + if (IS_ERR(ndev->rx_counter)) { + err = PTR_ERR(ndev->rx_counter); + goto err_fc; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest[0].tir_num = ndev->res.tirn; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); + ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2); + if (IS_ERR(ndev->rx_rule)) { + err = PTR_ERR(ndev->rx_rule); + ndev->rx_rule = NULL; + goto err_rule; + } + + return 0; + +err_rule: + mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); +err_fc: + mlx5_destroy_flow_table(ndev->rxft); + return err; +} + +static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) +{ + if (!ndev->rx_rule) + return; + + mlx5_del_flow_rules(ndev->rx_rule); + mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); + mlx5_destroy_flow_table(ndev->rxft); + + ndev->rx_rule = NULL; +} + +static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (unlikely(!mvq->ready)) + return; + + iowrite16(idx, ndev->mvdev.res.kick_addr); +} + +static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area, + u64 driver_area, u64 device_area) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + mvq->desc_addr = desc_area; + mvq->device_addr = device_area; + mvq->driver_addr = driver_area; + return 0; +} + +static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq; + + mvq = &ndev->vqs[idx]; + mvq->num_ent = num; +} + +static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx]; + + vq->event_cb = *cb; +} + +static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (!ready) + suspend_vq(ndev, mvq); + + mvq->ready = ready; +} + +static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + return mvq->ready; +} + +static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, + const struct vdpa_vq_state *state) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + + if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) { + mlx5_vdpa_warn(mvdev, "can't modify available index\n"); + return -EINVAL; + } + + mvq->used_idx = state->avail_index; + mvq->avail_idx = state->avail_index; + return 0; +} + +static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx]; + struct mlx5_virtq_attr attr; + int err; + + /* If the virtq object was destroyed, use the value saved at + * the last minute of suspend_vq. This caters for userspace + * that cares about emulating the index after vq is stopped. + */ + if (!mvq->initialized) { + /* Firmware returns a wrong value for the available index. + * Since both values should be identical, we take the value of + * used_idx which is reported correctly. + */ + state->avail_index = mvq->used_idx; + return 0; + } + + err = query_virtqueue(ndev, mvq, &attr); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n"); + return err; + } + state->avail_index = attr.used_index; + return 0; +} + +static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev) +{ + return PAGE_SIZE; +} + +enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9, + MLX5_VIRTIO_NET_F_CSUM = 1 << 10, + MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11, + MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12, +}; + +static u64 mlx_to_vritio_features(u16 dev_features) +{ + u64 result = 0; + + if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM) + result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM); + if (dev_features & MLX5_VIRTIO_NET_F_CSUM) + result |= BIT_ULL(VIRTIO_NET_F_CSUM); + if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6) + result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6); + if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4) + result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4); + + return result; +} + +static u64 mlx5_vdpa_get_features(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + u16 dev_features; + + dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask); + ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features); + if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0)) + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1); + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); + print_features(mvdev, ndev->mvdev.mlx_features, false); + return ndev->mvdev.mlx_features; +} + +static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) +{ + /* Minimum features to expect */ + if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) + return -EOPNOTSUPP; + + /* Double check features combination sent down by the driver. + * Fail invalid features due to absence of the depended feature. + * + * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit + * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ". + * By failing the invalid features sent down by untrusted drivers, + * we're assured the assumption made upon is_index_valid() and + * is_ctrl_vq_idx() will not be compromised. + */ + if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) == + BIT_ULL(VIRTIO_NET_F_MQ)) + return -EINVAL; + + return 0; +} + +static int setup_virtqueues(struct mlx5_vdpa_net *ndev) +{ + int err; + int i; + + for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) { + err = setup_vq(ndev, &ndev->vqs[i]); + if (err) + goto err_vq; + } + + return 0; + +err_vq: + for (--i; i >= 0; i--) + teardown_vq(ndev, &ndev->vqs[i]); + + return err; +} + +static void teardown_virtqueues(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + int i; + + for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) { + mvq = &ndev->vqs[i]; + if (!mvq->initialized) + continue; + + teardown_vq(ndev, mvq); + } +} + +/* TODO: cross-endian support */ +static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev) +{ + return virtio_legacy_is_little_endian() || + (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1)); +} + +static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val) +{ + return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val); +} + +static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + print_features(mvdev, features, true); + + err = verify_driver_features(mvdev, features); + if (err) + return err; + + ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; + ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu); + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); + return err; +} + +static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) +{ + /* not implemented */ + mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n"); +} + +#define MLX5_VDPA_MAX_VQ_ENTRIES 256 +static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev) +{ + return MLX5_VDPA_MAX_VQ_ENTRIES; +} + +static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev) +{ + return VIRTIO_ID_NET; +} + +static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev) +{ + return PCI_VENDOR_ID_MELLANOX; +} + +static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + print_status(mvdev, ndev->mvdev.status, false); + return ndev->mvdev.status; +} + +static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq) +{ + struct mlx5_vq_restore_info *ri = &mvq->ri; + struct mlx5_virtq_attr attr; + int err; + + if (!mvq->initialized) + return 0; + + err = query_virtqueue(ndev, mvq, &attr); + if (err) + return err; + + ri->avail_index = attr.available_index; + ri->used_index = attr.used_index; + ri->ready = mvq->ready; + ri->num_ent = mvq->num_ent; + ri->desc_addr = mvq->desc_addr; + ri->device_addr = mvq->device_addr; + ri->driver_addr = mvq->driver_addr; + ri->cb = mvq->event_cb; + ri->restore = true; + return 0; +} + +static int save_channels_info(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < ndev->mvdev.max_vqs; i++) { + memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri)); + save_channel_info(ndev, &ndev->vqs[i]); + } + return 0; +} + +static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < ndev->mvdev.max_vqs; i++) + memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); +} + +static void restore_channels_info(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + struct mlx5_vq_restore_info *ri; + int i; + + mlx5_clear_vqs(ndev); + init_mvqs(ndev); + for (i = 0; i < ndev->mvdev.max_vqs; i++) { + mvq = &ndev->vqs[i]; + ri = &mvq->ri; + if (!ri->restore) + continue; + + mvq->avail_idx = ri->avail_index; + mvq->used_idx = ri->used_index; + mvq->ready = ri->ready; + mvq->num_ent = ri->num_ent; + mvq->desc_addr = ri->desc_addr; + mvq->device_addr = ri->device_addr; + mvq->driver_addr = ri->driver_addr; + mvq->event_cb = ri->cb; + } +} + +static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb) +{ + int err; + + suspend_vqs(ndev); + err = save_channels_info(ndev); + if (err) + goto err_mr; + + teardown_driver(ndev); + mlx5_vdpa_destroy_mr(&ndev->mvdev); + err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb); + if (err) + goto err_mr; + + if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) + return 0; + + restore_channels_info(ndev); + err = setup_driver(ndev); + if (err) + goto err_setup; + + return 0; + +err_setup: + mlx5_vdpa_destroy_mr(&ndev->mvdev); +err_mr: + return err; +} + +static int setup_driver(struct mlx5_vdpa_net *ndev) +{ + int err; + + mutex_lock(&ndev->reslock); + if (ndev->setup) { + mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n"); + err = 0; + goto out; + } + err = setup_virtqueues(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n"); + goto out; + } + + err = create_rqt(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n"); + goto err_rqt; + } + + err = create_tir(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n"); + goto err_tir; + } + + err = add_fwd_to_tir(ndev); + if (err) { + mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n"); + goto err_fwd; + } + ndev->setup = true; + mutex_unlock(&ndev->reslock); + + return 0; + +err_fwd: + destroy_tir(ndev); +err_tir: + destroy_rqt(ndev); +err_rqt: + teardown_virtqueues(ndev); +out: + mutex_unlock(&ndev->reslock); + return err; +} + +static void teardown_driver(struct mlx5_vdpa_net *ndev) +{ + mutex_lock(&ndev->reslock); + if (!ndev->setup) + goto out; + + remove_fwd_to_tir(ndev); + destroy_tir(ndev); + destroy_rqt(ndev); + teardown_virtqueues(ndev); + ndev->setup = false; +out: + mutex_unlock(&ndev->reslock); +} + +static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) +{ + int i; + + for (i = 0; i < ndev->mvdev.max_vqs; i++) + ndev->vqs[i].ready = false; +} + +static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + int err; + + print_status(mvdev, status, true); + if (!status) { + mlx5_vdpa_info(mvdev, "performing device reset\n"); + teardown_driver(ndev); + clear_vqs_ready(ndev); + mlx5_vdpa_destroy_mr(&ndev->mvdev); + ndev->mvdev.status = 0; + ndev->mvdev.mlx_features = 0; + ++mvdev->generation; + return; + } + + if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { + if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + err = setup_driver(ndev); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); + goto err_setup; + } + } else { + mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); + return; + } + } + + ndev->mvdev.status = status; + return; + +err_setup: + mlx5_vdpa_destroy_mr(&ndev->mvdev); + ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; +} + +static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf, + unsigned int len) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + + if (offset + len <= sizeof(struct virtio_net_config)) + memcpy(buf, (u8 *)&ndev->config + offset, len); +} + +static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf, + unsigned int len) +{ + /* not supported */ +} + +static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + + return mvdev->generation; +} + +static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + bool change_map; + int err; + + err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); + if (err) { + mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); + return err; + } + + if (change_map) + return mlx5_vdpa_change_map(ndev, iotlb); + + return 0; +} + +static void mlx5_vdpa_free(struct vdpa_device *vdev) +{ + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); + struct mlx5_core_dev *pfmdev; + struct mlx5_vdpa_net *ndev; + + ndev = to_mlx5_vdpa_ndev(mvdev); + + free_resources(ndev); + if (!is_zero_ether_addr(ndev->config.mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); + mlx5_mpfs_del_mac(pfmdev, ndev->config.mac); + } + mlx5_vdpa_free_resources(&ndev->mvdev); + mutex_destroy(&ndev->reslock); +} + +static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx) +{ + struct vdpa_notification_area ret = {}; + + return ret; +} + +static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx) +{ + return -EOPNOTSUPP; +} + +static const struct vdpa_config_ops mlx5_vdpa_ops = { + .set_vq_address = mlx5_vdpa_set_vq_address, + .set_vq_num = mlx5_vdpa_set_vq_num, + .kick_vq = mlx5_vdpa_kick_vq, + .set_vq_cb = mlx5_vdpa_set_vq_cb, + .set_vq_ready = mlx5_vdpa_set_vq_ready, + .get_vq_ready = mlx5_vdpa_get_vq_ready, + .set_vq_state = mlx5_vdpa_set_vq_state, + .get_vq_state = mlx5_vdpa_get_vq_state, + .get_vq_notification = mlx5_get_vq_notification, + .get_vq_irq = mlx5_get_vq_irq, + .get_vq_align = mlx5_vdpa_get_vq_align, + .get_features = mlx5_vdpa_get_features, + .set_features = mlx5_vdpa_set_features, + .set_config_cb = mlx5_vdpa_set_config_cb, + .get_vq_num_max = mlx5_vdpa_get_vq_num_max, + .get_device_id = mlx5_vdpa_get_device_id, + .get_vendor_id = mlx5_vdpa_get_vendor_id, + .get_status = mlx5_vdpa_get_status, + .set_status = mlx5_vdpa_set_status, + .get_config = mlx5_vdpa_get_config, + .set_config = mlx5_vdpa_set_config, + .get_generation = mlx5_vdpa_get_generation, + .set_map = mlx5_vdpa_set_map, + .free = mlx5_vdpa_free, +}; + +static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu) +{ + u16 hw_mtu; + int err; + + err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu); + if (err) + return err; + + *mtu = hw_mtu - MLX5V_ETH_HARD_MTU; + return 0; +} + +static int alloc_resources(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_net_resources *res = &ndev->res; + int err; + + if (res->valid) { + mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n"); + return -EEXIST; + } + + err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn); + if (err) + return err; + + err = create_tis(ndev); + if (err) + goto err_tis; + + res->valid = true; + + return 0; + +err_tis: + mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); + return err; +} + +static void free_resources(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_net_resources *res = &ndev->res; + + if (!res->valid) + return; + + destroy_tis(ndev); + mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn); + res->valid = false; +} + +static void init_mvqs(struct mlx5_vdpa_net *ndev) +{ + struct mlx5_vdpa_virtqueue *mvq; + int i; + + for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); ++i) { + mvq = &ndev->vqs[i]; + memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); + mvq->index = i; + mvq->ndev = ndev; + mvq->fwqp.fw = true; + } + for (; i < ndev->mvdev.max_vqs; i++) { + mvq = &ndev->vqs[i]; + memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri)); + mvq->index = i; + mvq->ndev = ndev; + } +} + +void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev) +{ + struct virtio_net_config *config; + struct mlx5_core_dev *pfmdev; + struct mlx5_vdpa_dev *mvdev; + struct mlx5_vdpa_net *ndev; + u32 max_vqs; + int err; + + /* we save one virtqueue for control virtqueue should we require it */ + max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); + max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); + + ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops, + 2 * mlx5_vdpa_max_qps(max_vqs)); + if (IS_ERR(ndev)) + return ndev; + + ndev->mvdev.max_vqs = max_vqs; + mvdev = &ndev->mvdev; + mvdev->mdev = mdev; + init_mvqs(ndev); + mutex_init(&ndev->reslock); + config = &ndev->config; + err = query_mtu(mdev, &ndev->mtu); + if (err) + goto err_mtu; + + err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); + if (err) + goto err_mtu; + + if (!is_zero_ether_addr(config->mac)) { + pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); + err = mlx5_mpfs_add_mac(pfmdev, config->mac); + if (err) + goto err_mtu; + } + + mvdev->vdev.dma_dev = mdev->device; + err = mlx5_vdpa_alloc_resources(&ndev->mvdev); + if (err) + goto err_mpfs; + + err = alloc_resources(ndev); + if (err) + goto err_res; + + err = vdpa_register_device(&mvdev->vdev); + if (err) + goto err_reg; + + return ndev; + +err_reg: + free_resources(ndev); +err_res: + mlx5_vdpa_free_resources(&ndev->mvdev); +err_mpfs: + if (!is_zero_ether_addr(config->mac)) + mlx5_mpfs_del_mac(pfmdev, config->mac); +err_mtu: + mutex_destroy(&ndev->reslock); + put_device(&mvdev->vdev.dev); + return ERR_PTR(err); +} + +void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev) +{ + vdpa_unregister_device(&mvdev->vdev); +} diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h new file mode 100644 index 000000000..f2d6d68b0 --- /dev/null +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies Ltd. */ + +#ifndef __MLX5_VNET_H_ +#define __MLX5_VNET_H_ + +#include <linux/vdpa.h> +#include <linux/virtio_net.h> +#include <linux/vringh.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cq.h> +#include <linux/mlx5/qp.h> +#include "mlx5_vdpa.h" + +static inline u32 mlx5_vdpa_max_qps(int max_vqs) +{ + return max_vqs / 2; +} + +#define to_mlx5_vdpa_ndev(__mvdev) container_of(__mvdev, struct mlx5_vdpa_net, mvdev) +void *mlx5_vdpa_add_dev(struct mlx5_core_dev *mdev); +void mlx5_vdpa_remove_dev(struct mlx5_vdpa_dev *mvdev); + +#endif /* __MLX5_VNET_H_ */ diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c new file mode 100644 index 000000000..a69ffc991 --- /dev/null +++ b/drivers/vdpa/vdpa.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vDPA bus. + * + * Copyright (c) 2020, Red Hat. All rights reserved. + * Author: Jason Wang <jasowang@redhat.com> + * + */ + +#include <linux/module.h> +#include <linux/idr.h> +#include <linux/slab.h> +#include <linux/vdpa.h> + +static DEFINE_IDA(vdpa_index_ida); + +static int vdpa_dev_probe(struct device *d) +{ + struct vdpa_device *vdev = dev_to_vdpa(d); + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); + int ret = 0; + + if (drv && drv->probe) + ret = drv->probe(vdev); + + return ret; +} + +static int vdpa_dev_remove(struct device *d) +{ + struct vdpa_device *vdev = dev_to_vdpa(d); + struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); + + if (drv && drv->remove) + drv->remove(vdev); + + return 0; +} + +static struct bus_type vdpa_bus = { + .name = "vdpa", + .probe = vdpa_dev_probe, + .remove = vdpa_dev_remove, +}; + +static void vdpa_release_dev(struct device *d) +{ + struct vdpa_device *vdev = dev_to_vdpa(d); + const struct vdpa_config_ops *ops = vdev->config; + + if (ops->free) + ops->free(vdev); + + ida_simple_remove(&vdpa_index_ida, vdev->index); + kfree(vdev); +} + +/** + * __vdpa_alloc_device - allocate and initilaize a vDPA device + * This allows driver to some prepartion after device is + * initialized but before registered. + * @parent: the parent device + * @config: the bus operations that is supported by this device + * @nvqs: number of virtqueues supported by this device + * @size: size of the parent structure that contains private data + * + * Driver should use vdpa_alloc_device() wrapper macro instead of + * using this directly. + * + * Returns an error when parent/config/dma_dev is not set or fail to get + * ida. + */ +struct vdpa_device *__vdpa_alloc_device(struct device *parent, + const struct vdpa_config_ops *config, + int nvqs, + size_t size) +{ + struct vdpa_device *vdev; + int err = -EINVAL; + + if (!config) + goto err; + + if (!!config->dma_map != !!config->dma_unmap) + goto err; + + err = -ENOMEM; + vdev = kzalloc(size, GFP_KERNEL); + if (!vdev) + goto err; + + err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL); + if (err < 0) + goto err_ida; + + vdev->dev.bus = &vdpa_bus; + vdev->dev.parent = parent; + vdev->dev.release = vdpa_release_dev; + vdev->index = err; + vdev->config = config; + vdev->features_valid = false; + vdev->nvqs = nvqs; + + err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index); + if (err) + goto err_name; + + device_initialize(&vdev->dev); + + return vdev; + +err_name: + ida_simple_remove(&vdpa_index_ida, vdev->index); +err_ida: + kfree(vdev); +err: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(__vdpa_alloc_device); + +/** + * vdpa_register_device - register a vDPA device + * Callers must have a succeed call of vdpa_alloc_device() before. + * @vdev: the vdpa device to be registered to vDPA bus + * + * Returns an error when fail to add to vDPA bus + */ +int vdpa_register_device(struct vdpa_device *vdev) +{ + return device_add(&vdev->dev); +} +EXPORT_SYMBOL_GPL(vdpa_register_device); + +/** + * vdpa_unregister_device - unregister a vDPA device + * @vdev: the vdpa device to be unregisted from vDPA bus + */ +void vdpa_unregister_device(struct vdpa_device *vdev) +{ + device_unregister(&vdev->dev); +} +EXPORT_SYMBOL_GPL(vdpa_unregister_device); + +/** + * __vdpa_register_driver - register a vDPA device driver + * @drv: the vdpa device driver to be registered + * @owner: module owner of the driver + * + * Returns an err when fail to do the registration + */ +int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner) +{ + drv->driver.bus = &vdpa_bus; + drv->driver.owner = owner; + + return driver_register(&drv->driver); +} +EXPORT_SYMBOL_GPL(__vdpa_register_driver); + +/** + * vdpa_unregister_driver - unregister a vDPA device driver + * @drv: the vdpa device driver to be unregistered + */ +void vdpa_unregister_driver(struct vdpa_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL_GPL(vdpa_unregister_driver); + +static int vdpa_init(void) +{ + return bus_register(&vdpa_bus); +} + +static void __exit vdpa_exit(void) +{ + bus_unregister(&vdpa_bus); + ida_destroy(&vdpa_index_ida); +} +core_initcall(vdpa_init); +module_exit(vdpa_exit); + +MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile new file mode 100644 index 000000000..b40278f65 --- /dev/null +++ b/drivers/vdpa/vdpa_sim/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c new file mode 100644 index 000000000..e65c0fa95 --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -0,0 +1,780 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA networking device simulator. + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang <jasowang@redhat.com> + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/uuid.h> +#include <linux/iommu.h> +#include <linux/dma-map-ops.h> +#include <linux/sysfs.h> +#include <linux/file.h> +#include <linux/etherdevice.h> +#include <linux/vringh.h> +#include <linux/vdpa.h> +#include <linux/virtio_byteorder.h> +#include <linux/vhost_iotlb.h> +#include <uapi/linux/virtio_config.h> +#include <uapi/linux/virtio_net.h> + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>" +#define DRV_DESC "vDPA Device Simulator" +#define DRV_LICENSE "GPL v2" + +static int batch_mapping = 1; +module_param(batch_mapping, int, 0444); +MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); + +static char *macaddr; +module_param(macaddr, charp, 0); +MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); + +u8 macaddr_buf[ETH_ALEN]; + +struct vdpasim_virtqueue { + struct vringh vring; + struct vringh_kiov iov; + unsigned short head; + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num; + void *private; + irqreturn_t (*cb)(void *data); +}; + +#define VDPASIM_QUEUE_ALIGN PAGE_SIZE +#define VDPASIM_QUEUE_MAX 256 +#define VDPASIM_DEVICE_ID 0x1 +#define VDPASIM_VENDOR_ID 0 +#define VDPASIM_VQ_NUM 0x2 +#define VDPASIM_NAME "vdpasim-netdev" + +static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | + (1ULL << VIRTIO_F_VERSION_1) | + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | + (1ULL << VIRTIO_NET_F_MAC); + +struct vdpasim; + +struct vdpasim_dev_attr { + size_t config_size; + int nvqs; + void (*get_config)(struct vdpasim *vdpasim, void *config); +}; + +/* State of each vdpasim device */ +struct vdpasim { + struct vdpa_device vdpa; + struct vdpasim_virtqueue *vqs; + struct work_struct work; + struct vdpasim_dev_attr dev_attr; + /* spinlock to synchronize virtqueue state */ + spinlock_t lock; + /* virtio config according to device type */ + void *config; + struct vhost_iotlb *iommu; + void *buffer; + u32 status; + u32 generation; + u64 features; + /* spinlock to synchronize iommu table */ + spinlock_t iommu_lock; +}; + +/* TODO: cross-endian support */ +static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) +{ + return virtio_legacy_is_little_endian() || + (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); +} + +static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) +{ + return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) +{ + return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); +} + +static struct vdpasim *vdpasim_dev; + +static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct vdpasim, vdpa); +} + +static struct vdpasim *dev_to_sim(struct device *dev) +{ + struct vdpa_device *vdpa = dev_to_vdpa(dev); + + return vdpa_to_sim(vdpa); +} + +static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) +{ + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vringh_init_iotlb(&vq->vring, vdpasim_features, + VDPASIM_QUEUE_MAX, false, + (struct vring_desc *)(uintptr_t)vq->desc_addr, + (struct vring_avail *) + (uintptr_t)vq->driver_addr, + (struct vring_used *) + (uintptr_t)vq->device_addr); +} + +static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) +{ + vq->ready = false; + vq->desc_addr = 0; + vq->driver_addr = 0; + vq->device_addr = 0; + vq->cb = NULL; + vq->private = NULL; + vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, + false, NULL, NULL, NULL); +} + +static void vdpasim_reset(struct vdpasim *vdpasim) +{ + int i; + + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) + vdpasim_vq_reset(&vdpasim->vqs[i]); + + spin_lock(&vdpasim->iommu_lock); + vhost_iotlb_reset(vdpasim->iommu); + spin_unlock(&vdpasim->iommu_lock); + + vdpasim->features = 0; + vdpasim->status = 0; + ++vdpasim->generation; +} + +static void vdpasim_work(struct work_struct *work) +{ + struct vdpasim *vdpasim = container_of(work, struct + vdpasim, work); + struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; + struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; + ssize_t read, write; + size_t total_write; + int pkts = 0; + int err; + + spin_lock(&vdpasim->lock); + + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + + if (!txq->ready || !rxq->ready) + goto out; + + while (true) { + total_write = 0; + err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL, + &txq->head, GFP_ATOMIC); + if (err <= 0) + break; + + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov, + &rxq->head, GFP_ATOMIC); + if (err <= 0) { + vringh_complete_iotlb(&txq->vring, txq->head, 0); + break; + } + + while (true) { + read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov, + vdpasim->buffer, + PAGE_SIZE); + if (read <= 0) + break; + + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov, + vdpasim->buffer, read); + if (write <= 0) + break; + + total_write += write; + } + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + vringh_complete_iotlb(&txq->vring, txq->head, 0); + vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (txq->cb) + txq->cb(txq->private); + if (rxq->cb) + rxq->cb(rxq->private); + local_bh_enable(); + + if (++pkts > 4) { + schedule_work(&vdpasim->work); + goto out; + } + } + +out: + spin_unlock(&vdpasim->lock); +} + +static int dir_to_perm(enum dma_data_direction dir) +{ + int perm = -EFAULT; + + switch (dir) { + case DMA_FROM_DEVICE: + perm = VHOST_MAP_WO; + break; + case DMA_TO_DEVICE: + perm = VHOST_MAP_RO; + break; + case DMA_BIDIRECTIONAL: + perm = VHOST_MAP_RW; + break; + default: + break; + } + + return perm; +} + +static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct vdpasim *vdpasim = dev_to_sim(dev); + struct vhost_iotlb *iommu = vdpasim->iommu; + u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset; + int ret, perm = dir_to_perm(dir); + + if (perm < 0) + return DMA_MAPPING_ERROR; + + /* For simplicity, use identical mapping to avoid e.g iova + * allocator. + */ + spin_lock(&vdpasim->iommu_lock); + ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, + pa, dir_to_perm(dir)); + spin_unlock(&vdpasim->iommu_lock); + if (ret) + return DMA_MAPPING_ERROR; + + return (dma_addr_t)(pa); +} + +static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct vdpasim *vdpasim = dev_to_sim(dev); + struct vhost_iotlb *iommu = vdpasim->iommu; + + spin_lock(&vdpasim->iommu_lock); + vhost_iotlb_del_range(iommu, (u64)dma_addr, + (u64)dma_addr + size - 1); + spin_unlock(&vdpasim->iommu_lock); +} + +static void *vdpasim_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_addr, gfp_t flag, + unsigned long attrs) +{ + struct vdpasim *vdpasim = dev_to_sim(dev); + struct vhost_iotlb *iommu = vdpasim->iommu; + void *addr = kmalloc(size, flag); + int ret; + + spin_lock(&vdpasim->iommu_lock); + if (!addr) { + *dma_addr = DMA_MAPPING_ERROR; + } else { + u64 pa = virt_to_phys(addr); + + ret = vhost_iotlb_add_range(iommu, (u64)pa, + (u64)pa + size - 1, + pa, VHOST_MAP_RW); + if (ret) { + *dma_addr = DMA_MAPPING_ERROR; + kfree(addr); + addr = NULL; + } else + *dma_addr = (dma_addr_t)pa; + } + spin_unlock(&vdpasim->iommu_lock); + + return addr; +} + +static void vdpasim_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + unsigned long attrs) +{ + struct vdpasim *vdpasim = dev_to_sim(dev); + struct vhost_iotlb *iommu = vdpasim->iommu; + + spin_lock(&vdpasim->iommu_lock); + vhost_iotlb_del_range(iommu, (u64)dma_addr, + (u64)dma_addr + size - 1); + spin_unlock(&vdpasim->iommu_lock); + + kfree(phys_to_virt((uintptr_t)dma_addr)); +} + +static const struct dma_map_ops vdpasim_dma_ops = { + .map_page = vdpasim_map_page, + .unmap_page = vdpasim_unmap_page, + .alloc = vdpasim_alloc_coherent, + .free = vdpasim_free_coherent, +}; + +static const struct vdpa_config_ops vdpasim_net_config_ops; +static const struct vdpa_config_ops vdpasim_net_batch_config_ops; + +static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) +{ + const struct vdpa_config_ops *ops; + struct vdpasim *vdpasim; + struct device *dev; + int i, ret = -ENOMEM; + + if (batch_mapping) + ops = &vdpasim_net_batch_config_ops; + else + ops = &vdpasim_net_config_ops; + + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, + dev_attr->nvqs); + if (!vdpasim) + goto err_alloc; + + vdpasim->dev_attr = *dev_attr; + INIT_WORK(&vdpasim->work, vdpasim_work); + spin_lock_init(&vdpasim->lock); + spin_lock_init(&vdpasim->iommu_lock); + + dev = &vdpasim->vdpa.dev; + dev->dma_mask = &dev->coherent_dma_mask; + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) + goto err_iommu; + set_dma_ops(dev, &vdpasim_dma_ops); + + vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL); + if (!vdpasim->config) + goto err_iommu; + + vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue), + GFP_KERNEL); + if (!vdpasim->vqs) + goto err_iommu; + + vdpasim->iommu = vhost_iotlb_alloc(2048, 0); + if (!vdpasim->iommu) + goto err_iommu; + + vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vdpasim->buffer) + goto err_iommu; + + if (macaddr) { + mac_pton(macaddr, macaddr_buf); + if (!is_valid_ether_addr(macaddr_buf)) { + ret = -EADDRNOTAVAIL; + goto err_iommu; + } + } else { + eth_random_addr(macaddr_buf); + } + + for (i = 0; i < dev_attr->nvqs; i++) + vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); + + vdpasim->vdpa.dma_dev = dev; + ret = vdpa_register_device(&vdpasim->vdpa); + if (ret) + goto err_iommu; + + return vdpasim; + +err_iommu: + put_device(dev); +err_alloc: + return ERR_PTR(ret); +} + +static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->desc_addr = desc_area; + vq->driver_addr = driver_area; + vq->device_addr = device_area; + + return 0; +} + +static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->num = num; +} + +static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + if (vq->ready) + schedule_work(&vdpasim->work); +} + +static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx, + struct vdpa_callback *cb) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->cb = cb->callback; + vq->private = cb->private; +} + +static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + bool old_ready; + + spin_lock(&vdpasim->lock); + old_ready = vq->ready; + vq->ready = ready; + if (vq->ready && !old_ready) { + vdpasim_queue_ready(vdpasim, idx); + } + spin_unlock(&vdpasim->lock); +} + +static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + return vq->ready; +} + +static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, + const struct vdpa_vq_state *state) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + struct vringh *vrh = &vq->vring; + + spin_lock(&vdpasim->lock); + vrh->last_avail_idx = state->avail_index; + spin_unlock(&vdpasim->lock); + + return 0; +} + +static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx, + struct vdpa_vq_state *state) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + struct vringh *vrh = &vq->vring; + + state->avail_index = vrh->last_avail_idx; + return 0; +} + +static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) +{ + return VDPASIM_QUEUE_ALIGN; +} + +static u64 vdpasim_get_features(struct vdpa_device *vdpa) +{ + return vdpasim_features; +} + +static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + /* DMA mapping must be done by driver */ + if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) + return -EINVAL; + + vdpasim->features = features & vdpasim_features; + + return 0; +} + +static void vdpasim_set_config_cb(struct vdpa_device *vdpa, + struct vdpa_callback *cb) +{ + /* We don't support config interrupt */ +} + +static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) +{ + return VDPASIM_QUEUE_MAX; +} + +static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) +{ + return VDPASIM_DEVICE_ID; +} + +static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) +{ + return VDPASIM_VENDOR_ID; +} + +static u8 vdpasim_get_status(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + u8 status; + + spin_lock(&vdpasim->lock); + status = vdpasim->status; + spin_unlock(&vdpasim->lock); + + return status; +} + +static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + spin_lock(&vdpasim->lock); + vdpasim->status = status; + if (status == 0) + vdpasim_reset(vdpasim); + spin_unlock(&vdpasim->lock); +} + +static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, + void *buf, unsigned int len) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (offset + len > vdpasim->dev_attr.config_size) + return; + + if (vdpasim->dev_attr.get_config) + vdpasim->dev_attr.get_config(vdpasim, vdpasim->config); + + memcpy(buf, vdpasim->config + offset, len); +} + +static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, + const void *buf, unsigned int len) +{ + /* No writable config supportted by vdpasim */ +} + +static u32 vdpasim_get_generation(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->generation; +} + +static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa) +{ + struct vdpa_iova_range range = { + .first = 0ULL, + .last = ULLONG_MAX, + }; + + return range; +} + +static int vdpasim_set_map(struct vdpa_device *vdpa, + struct vhost_iotlb *iotlb) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vhost_iotlb_map *map; + u64 start = 0ULL, last = 0ULL - 1; + int ret; + + spin_lock(&vdpasim->iommu_lock); + vhost_iotlb_reset(vdpasim->iommu); + + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, + map->last, map->addr, map->perm); + if (ret) + goto err; + } + spin_unlock(&vdpasim->iommu_lock); + return 0; + +err: + vhost_iotlb_reset(vdpasim->iommu); + spin_unlock(&vdpasim->iommu_lock); + return ret; +} + +static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, + u64 pa, u32 perm) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + int ret; + + spin_lock(&vdpasim->iommu_lock); + ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, + perm); + spin_unlock(&vdpasim->iommu_lock); + + return ret; +} + +static int vdpasim_dma_unmap(struct vdpa_device *vdpa, u64 iova, u64 size) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + spin_lock(&vdpasim->iommu_lock); + vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1); + spin_unlock(&vdpasim->iommu_lock); + + return 0; +} + +static void vdpasim_free(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + cancel_work_sync(&vdpasim->work); + kfree(vdpasim->buffer); + if (vdpasim->iommu) + vhost_iotlb_free(vdpasim->iommu); + kfree(vdpasim->vqs); + kfree(vdpasim->config); +} + +static const struct vdpa_config_ops vdpasim_net_config_ops = { + .set_vq_address = vdpasim_set_vq_address, + .set_vq_num = vdpasim_set_vq_num, + .kick_vq = vdpasim_kick_vq, + .set_vq_cb = vdpasim_set_vq_cb, + .set_vq_ready = vdpasim_set_vq_ready, + .get_vq_ready = vdpasim_get_vq_ready, + .set_vq_state = vdpasim_set_vq_state, + .get_vq_state = vdpasim_get_vq_state, + .get_vq_align = vdpasim_get_vq_align, + .get_features = vdpasim_get_features, + .set_features = vdpasim_set_features, + .set_config_cb = vdpasim_set_config_cb, + .get_vq_num_max = vdpasim_get_vq_num_max, + .get_device_id = vdpasim_get_device_id, + .get_vendor_id = vdpasim_get_vendor_id, + .get_status = vdpasim_get_status, + .set_status = vdpasim_set_status, + .get_config = vdpasim_get_config, + .set_config = vdpasim_set_config, + .get_generation = vdpasim_get_generation, + .get_iova_range = vdpasim_get_iova_range, + .dma_map = vdpasim_dma_map, + .dma_unmap = vdpasim_dma_unmap, + .free = vdpasim_free, +}; + +static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { + .set_vq_address = vdpasim_set_vq_address, + .set_vq_num = vdpasim_set_vq_num, + .kick_vq = vdpasim_kick_vq, + .set_vq_cb = vdpasim_set_vq_cb, + .set_vq_ready = vdpasim_set_vq_ready, + .get_vq_ready = vdpasim_get_vq_ready, + .set_vq_state = vdpasim_set_vq_state, + .get_vq_state = vdpasim_get_vq_state, + .get_vq_align = vdpasim_get_vq_align, + .get_features = vdpasim_get_features, + .set_features = vdpasim_set_features, + .set_config_cb = vdpasim_set_config_cb, + .get_vq_num_max = vdpasim_get_vq_num_max, + .get_device_id = vdpasim_get_device_id, + .get_vendor_id = vdpasim_get_vendor_id, + .get_status = vdpasim_get_status, + .set_status = vdpasim_set_status, + .get_config = vdpasim_get_config, + .set_config = vdpasim_set_config, + .get_generation = vdpasim_get_generation, + .get_iova_range = vdpasim_get_iova_range, + .set_map = vdpasim_set_map, + .free = vdpasim_free, +}; + +static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_net_config *net_config = + (struct virtio_net_config *)config; + + net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); + net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); + memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + +static int __init vdpasim_dev_init(void) +{ + struct vdpasim_dev_attr dev_attr = {}; + + dev_attr.nvqs = VDPASIM_VQ_NUM; + dev_attr.config_size = sizeof(struct virtio_net_config); + dev_attr.get_config = vdpasim_net_get_config; + + vdpasim_dev = vdpasim_create(&dev_attr); + + if (!IS_ERR(vdpasim_dev)) + return 0; + + return PTR_ERR(vdpasim_dev); +} + +static void __exit vdpasim_dev_exit(void) +{ + struct vdpa_device *vdpa = &vdpasim_dev->vdpa; + + vdpa_unregister_device(vdpa); +} + +module_init(vdpasim_dev_init) +module_exit(vdpasim_dev_exit) + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); |