diff options
Diffstat (limited to 'drivers/vdpa/vdpa_sim')
-rw-r--r-- | drivers/vdpa/vdpa_sim/Makefile | 4 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim.c | 807 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim.h | 121 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 527 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 564 |
5 files changed, 2023 insertions, 0 deletions
diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile new file mode 100644 index 0000000000..d458103302 --- /dev/null +++ b/drivers/vdpa/vdpa_sim/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o +obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o +obj-$(CONFIG_VDPA_SIM_BLOCK) += vdpa_sim_blk.o diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c new file mode 100644 index 0000000000..76d41058ad --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA device simulator core. + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang <jasowang@redhat.com> + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/slab.h> +#include <linux/dma-map-ops.h> +#include <linux/vringh.h> +#include <linux/vdpa.h> +#include <linux/vhost_iotlb.h> +#include <uapi/linux/vdpa.h> +#include <uapi/linux/vhost_types.h> + +#include "vdpa_sim.h" + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>" +#define DRV_DESC "vDPA Device Simulator core" +#define DRV_LICENSE "GPL v2" + +static int batch_mapping = 1; +module_param(batch_mapping, int, 0444); +MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); + +static int max_iotlb_entries = 2048; +module_param(max_iotlb_entries, int, 0444); +MODULE_PARM_DESC(max_iotlb_entries, + "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)"); + +static bool use_va = true; +module_param(use_va, bool, 0444); +MODULE_PARM_DESC(use_va, "Enable/disable the device's ability to use VA"); + +#define VDPASIM_QUEUE_ALIGN PAGE_SIZE +#define VDPASIM_QUEUE_MAX 256 +#define VDPASIM_VENDOR_ID 0 + +struct vdpasim_mm_work { + struct kthread_work work; + struct vdpasim *vdpasim; + struct mm_struct *mm_to_bind; + int ret; +}; + +static void vdpasim_mm_work_fn(struct kthread_work *work) +{ + struct vdpasim_mm_work *mm_work = + container_of(work, struct vdpasim_mm_work, work); + struct vdpasim *vdpasim = mm_work->vdpasim; + + mm_work->ret = 0; + + //TODO: should we attach the cgroup of the mm owner? + vdpasim->mm_bound = mm_work->mm_to_bind; +} + +static void vdpasim_worker_change_mm_sync(struct vdpasim *vdpasim, + struct vdpasim_mm_work *mm_work) +{ + struct kthread_work *work = &mm_work->work; + + kthread_init_work(work, vdpasim_mm_work_fn); + kthread_queue_work(vdpasim->worker, work); + + kthread_flush_work(work); +} + +static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) +{ + return container_of(vdpa, struct vdpasim, vdpa); +} + +static void vdpasim_vq_notify(struct vringh *vring) +{ + struct vdpasim_virtqueue *vq = + container_of(vring, struct vdpasim_virtqueue, vring); + + if (!vq->cb) + return; + + vq->cb(vq->private); +} + +static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) +{ + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + uint16_t last_avail_idx = vq->vring.last_avail_idx; + struct vring_desc *desc = (struct vring_desc *) + (uintptr_t)vq->desc_addr; + struct vring_avail *avail = (struct vring_avail *) + (uintptr_t)vq->driver_addr; + struct vring_used *used = (struct vring_used *) + (uintptr_t)vq->device_addr; + + if (use_va && vdpasim->mm_bound) { + vringh_init_iotlb_va(&vq->vring, vdpasim->features, vq->num, + true, desc, avail, used); + } else { + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, + true, desc, avail, used); + } + + vq->vring.last_avail_idx = last_avail_idx; + + /* + * Since vdpa_sim does not support receive inflight descriptors as a + * destination of a migration, let's set both avail_idx and used_idx + * the same at vq start. This is how vhost-user works in a + * VHOST_SET_VRING_BASE call. + * + * Although the simple fix is to set last_used_idx at + * vdpasim_set_vq_state, it would be reset at vdpasim_queue_ready. + */ + vq->vring.last_used_idx = last_avail_idx; + vq->vring.notify = vdpasim_vq_notify; +} + +static void vdpasim_vq_reset(struct vdpasim *vdpasim, + struct vdpasim_virtqueue *vq) +{ + vq->ready = false; + vq->desc_addr = 0; + vq->driver_addr = 0; + vq->device_addr = 0; + vq->cb = NULL; + vq->private = NULL; + vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, + VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL); + + vq->vring.notify = NULL; +} + +static void vdpasim_do_reset(struct vdpasim *vdpasim) +{ + int i; + + spin_lock(&vdpasim->iommu_lock); + + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { + vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]); + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], + &vdpasim->iommu_lock); + } + + for (i = 0; i < vdpasim->dev_attr.nas; i++) { + vhost_iotlb_reset(&vdpasim->iommu[i]); + vhost_iotlb_add_range(&vdpasim->iommu[i], 0, ULONG_MAX, + 0, VHOST_MAP_RW); + vdpasim->iommu_pt[i] = true; + } + + vdpasim->running = true; + spin_unlock(&vdpasim->iommu_lock); + + vdpasim->features = 0; + vdpasim->status = 0; + ++vdpasim->generation; +} + +static const struct vdpa_config_ops vdpasim_config_ops; +static const struct vdpa_config_ops vdpasim_batch_config_ops; + +static void vdpasim_work_fn(struct kthread_work *work) +{ + struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); + struct mm_struct *mm = vdpasim->mm_bound; + + if (use_va && mm) { + if (!mmget_not_zero(mm)) + return; + kthread_use_mm(mm); + } + + vdpasim->dev_attr.work_fn(vdpasim); + + if (use_va && mm) { + kthread_unuse_mm(mm); + mmput(mm); + } +} + +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, + const struct vdpa_dev_set_config *config) +{ + const struct vdpa_config_ops *ops; + struct vdpa_device *vdpa; + struct vdpasim *vdpasim; + struct device *dev; + int i, ret = -ENOMEM; + + if (!dev_attr->alloc_size) + return ERR_PTR(-EINVAL); + + if (config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) { + if (config->device_features & + ~dev_attr->supported_features) + return ERR_PTR(-EINVAL); + dev_attr->supported_features = + config->device_features; + } + + if (batch_mapping) + ops = &vdpasim_batch_config_ops; + else + ops = &vdpasim_config_ops; + + vdpa = __vdpa_alloc_device(NULL, ops, + dev_attr->ngroups, dev_attr->nas, + dev_attr->alloc_size, + dev_attr->name, use_va); + if (IS_ERR(vdpa)) { + ret = PTR_ERR(vdpa); + goto err_alloc; + } + + vdpasim = vdpa_to_sim(vdpa); + vdpasim->dev_attr = *dev_attr; + dev = &vdpasim->vdpa.dev; + + kthread_init_work(&vdpasim->work, vdpasim_work_fn); + vdpasim->worker = kthread_create_worker(0, "vDPA sim worker: %s", + dev_attr->name); + if (IS_ERR(vdpasim->worker)) + goto err_iommu; + + mutex_init(&vdpasim->mutex); + spin_lock_init(&vdpasim->iommu_lock); + + dev->dma_mask = &dev->coherent_dma_mask; + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) + goto err_iommu; + vdpasim->vdpa.mdev = dev_attr->mgmt_dev; + + vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL); + if (!vdpasim->config) + goto err_iommu; + + vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue), + GFP_KERNEL); + if (!vdpasim->vqs) + goto err_iommu; + + vdpasim->iommu = kmalloc_array(vdpasim->dev_attr.nas, + sizeof(*vdpasim->iommu), GFP_KERNEL); + if (!vdpasim->iommu) + goto err_iommu; + + vdpasim->iommu_pt = kmalloc_array(vdpasim->dev_attr.nas, + sizeof(*vdpasim->iommu_pt), GFP_KERNEL); + if (!vdpasim->iommu_pt) + goto err_iommu; + + for (i = 0; i < vdpasim->dev_attr.nas; i++) + vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0); + + for (i = 0; i < dev_attr->nvqs; i++) + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], + &vdpasim->iommu_lock); + + vdpasim->vdpa.dma_dev = dev; + + return vdpasim; + +err_iommu: + put_device(dev); +err_alloc: + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(vdpasim_create); + +void vdpasim_schedule_work(struct vdpasim *vdpasim) +{ + kthread_queue_work(vdpasim->worker, &vdpasim->work); +} +EXPORT_SYMBOL_GPL(vdpasim_schedule_work); + +static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, + u64 desc_area, u64 driver_area, + u64 device_area) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->desc_addr = desc_area; + vq->driver_addr = driver_area; + vq->device_addr = device_area; + + return 0; +} + +static void vdpasim_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->num = num; +} + +static void vdpasim_kick_vq(struct vdpa_device *vdpa, u16 idx) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + if (!vdpasim->running && + (vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + vdpasim->pending_kick = true; + return; + } + + if (vq->ready) + vdpasim_schedule_work(vdpasim); +} + +static void vdpasim_set_vq_cb(struct vdpa_device *vdpa, u16 idx, + struct vdpa_callback *cb) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + vq->cb = cb->callback; + vq->private = cb->private; +} + +static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, u16 idx, bool ready) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + bool old_ready; + + mutex_lock(&vdpasim->mutex); + old_ready = vq->ready; + vq->ready = ready; + if (vq->ready && !old_ready) { + vdpasim_queue_ready(vdpasim, idx); + } + mutex_unlock(&vdpasim->mutex); +} + +static bool vdpasim_get_vq_ready(struct vdpa_device *vdpa, u16 idx) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + + return vq->ready; +} + +static int vdpasim_set_vq_state(struct vdpa_device *vdpa, u16 idx, + const struct vdpa_vq_state *state) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + struct vringh *vrh = &vq->vring; + + mutex_lock(&vdpasim->mutex); + vrh->last_avail_idx = state->split.avail_index; + mutex_unlock(&vdpasim->mutex); + + return 0; +} + +static int vdpasim_get_vq_state(struct vdpa_device *vdpa, u16 idx, + struct vdpa_vq_state *state) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; + struct vringh *vrh = &vq->vring; + + state->split.avail_index = vrh->last_avail_idx; + return 0; +} + +static int vdpasim_get_vq_stats(struct vdpa_device *vdpa, u16 idx, + struct sk_buff *msg, + struct netlink_ext_ack *extack) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (vdpasim->dev_attr.get_stats) + return vdpasim->dev_attr.get_stats(vdpasim, idx, + msg, extack); + return -EOPNOTSUPP; +} + +static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) +{ + return VDPASIM_QUEUE_ALIGN; +} + +static u32 vdpasim_get_vq_group(struct vdpa_device *vdpa, u16 idx) +{ + /* RX and TX belongs to group 0, CVQ belongs to group 1 */ + if (idx == 2) + return 1; + else + return 0; +} + +static u64 vdpasim_get_device_features(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.supported_features; +} + +static u64 vdpasim_get_backend_features(const struct vdpa_device *vdpa) +{ + return BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK); +} + +static int vdpasim_set_driver_features(struct vdpa_device *vdpa, u64 features) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + /* DMA mapping must be done by driver */ + if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) + return -EINVAL; + + vdpasim->features = features & vdpasim->dev_attr.supported_features; + + return 0; +} + +static u64 vdpasim_get_driver_features(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->features; +} + +static void vdpasim_set_config_cb(struct vdpa_device *vdpa, + struct vdpa_callback *cb) +{ + /* We don't support config interrupt */ +} + +static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) +{ + return VDPASIM_QUEUE_MAX; +} + +static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.id; +} + +static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) +{ + return VDPASIM_VENDOR_ID; +} + +static u8 vdpasim_get_status(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + u8 status; + + mutex_lock(&vdpasim->mutex); + status = vdpasim->status; + mutex_unlock(&vdpasim->mutex); + + return status; +} + +static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + mutex_lock(&vdpasim->mutex); + vdpasim->status = status; + mutex_unlock(&vdpasim->mutex); +} + +static int vdpasim_reset(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + mutex_lock(&vdpasim->mutex); + vdpasim->status = 0; + vdpasim_do_reset(vdpasim); + mutex_unlock(&vdpasim->mutex); + + return 0; +} + +static int vdpasim_suspend(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + mutex_lock(&vdpasim->mutex); + vdpasim->running = false; + mutex_unlock(&vdpasim->mutex); + + return 0; +} + +static int vdpasim_resume(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + int i; + + mutex_lock(&vdpasim->mutex); + vdpasim->running = true; + + if (vdpasim->pending_kick) { + /* Process pending descriptors */ + for (i = 0; i < vdpasim->dev_attr.nvqs; ++i) + vdpasim_kick_vq(vdpa, i); + + vdpasim->pending_kick = false; + } + + mutex_unlock(&vdpasim->mutex); + + return 0; +} + +static size_t vdpasim_get_config_size(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.config_size; +} + +static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, + void *buf, unsigned int len) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (offset + len > vdpasim->dev_attr.config_size) + return; + + if (vdpasim->dev_attr.get_config) + vdpasim->dev_attr.get_config(vdpasim, vdpasim->config); + + memcpy(buf, vdpasim->config + offset, len); +} + +static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, + const void *buf, unsigned int len) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (offset + len > vdpasim->dev_attr.config_size) + return; + + memcpy(vdpasim->config + offset, buf, len); + + if (vdpasim->dev_attr.set_config) + vdpasim->dev_attr.set_config(vdpasim, vdpasim->config); +} + +static u32 vdpasim_get_generation(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->generation; +} + +static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa) +{ + struct vdpa_iova_range range = { + .first = 0ULL, + .last = ULLONG_MAX, + }; + + return range; +} + +static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group, + unsigned int asid) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vhost_iotlb *iommu; + int i; + + if (group > vdpasim->dev_attr.ngroups) + return -EINVAL; + + if (asid >= vdpasim->dev_attr.nas) + return -EINVAL; + + iommu = &vdpasim->iommu[asid]; + + mutex_lock(&vdpasim->mutex); + + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) + if (vdpasim_get_vq_group(vdpa, i) == group) + vringh_set_iotlb(&vdpasim->vqs[i].vring, iommu, + &vdpasim->iommu_lock); + + mutex_unlock(&vdpasim->mutex); + + return 0; +} + +static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, + struct vhost_iotlb *iotlb) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vhost_iotlb_map *map; + struct vhost_iotlb *iommu; + u64 start = 0ULL, last = 0ULL - 1; + int ret; + + if (asid >= vdpasim->dev_attr.nas) + return -EINVAL; + + spin_lock(&vdpasim->iommu_lock); + + iommu = &vdpasim->iommu[asid]; + vhost_iotlb_reset(iommu); + vdpasim->iommu_pt[asid] = false; + + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + ret = vhost_iotlb_add_range(iommu, map->start, + map->last, map->addr, map->perm); + if (ret) + goto err; + } + spin_unlock(&vdpasim->iommu_lock); + return 0; + +err: + vhost_iotlb_reset(iommu); + spin_unlock(&vdpasim->iommu_lock); + return ret; +} + +static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_mm_work mm_work; + + mm_work.vdpasim = vdpasim; + mm_work.mm_to_bind = mm; + + vdpasim_worker_change_mm_sync(vdpasim, &mm_work); + + return mm_work.ret; +} + +static void vdpasim_unbind_mm(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + struct vdpasim_mm_work mm_work; + + mm_work.vdpasim = vdpasim; + mm_work.mm_to_bind = NULL; + + vdpasim_worker_change_mm_sync(vdpasim, &mm_work); +} + +static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid, + u64 iova, u64 size, + u64 pa, u32 perm, void *opaque) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + int ret; + + if (asid >= vdpasim->dev_attr.nas) + return -EINVAL; + + spin_lock(&vdpasim->iommu_lock); + if (vdpasim->iommu_pt[asid]) { + vhost_iotlb_reset(&vdpasim->iommu[asid]); + vdpasim->iommu_pt[asid] = false; + } + ret = vhost_iotlb_add_range_ctx(&vdpasim->iommu[asid], iova, + iova + size - 1, pa, perm, opaque); + spin_unlock(&vdpasim->iommu_lock); + + return ret; +} + +static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid, + u64 iova, u64 size) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (asid >= vdpasim->dev_attr.nas) + return -EINVAL; + + if (vdpasim->iommu_pt[asid]) { + vhost_iotlb_reset(&vdpasim->iommu[asid]); + vdpasim->iommu_pt[asid] = false; + } + + spin_lock(&vdpasim->iommu_lock); + vhost_iotlb_del_range(&vdpasim->iommu[asid], iova, iova + size - 1); + spin_unlock(&vdpasim->iommu_lock); + + return 0; +} + +static void vdpasim_free(struct vdpa_device *vdpa) +{ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + int i; + + kthread_cancel_work_sync(&vdpasim->work); + kthread_destroy_worker(vdpasim->worker); + + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) { + vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov); + vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov); + } + + vdpasim->dev_attr.free(vdpasim); + + for (i = 0; i < vdpasim->dev_attr.nas; i++) + vhost_iotlb_reset(&vdpasim->iommu[i]); + kfree(vdpasim->iommu); + kfree(vdpasim->iommu_pt); + kfree(vdpasim->vqs); + kfree(vdpasim->config); +} + +static const struct vdpa_config_ops vdpasim_config_ops = { + .set_vq_address = vdpasim_set_vq_address, + .set_vq_num = vdpasim_set_vq_num, + .kick_vq = vdpasim_kick_vq, + .set_vq_cb = vdpasim_set_vq_cb, + .set_vq_ready = vdpasim_set_vq_ready, + .get_vq_ready = vdpasim_get_vq_ready, + .set_vq_state = vdpasim_set_vq_state, + .get_vendor_vq_stats = vdpasim_get_vq_stats, + .get_vq_state = vdpasim_get_vq_state, + .get_vq_align = vdpasim_get_vq_align, + .get_vq_group = vdpasim_get_vq_group, + .get_device_features = vdpasim_get_device_features, + .get_backend_features = vdpasim_get_backend_features, + .set_driver_features = vdpasim_set_driver_features, + .get_driver_features = vdpasim_get_driver_features, + .set_config_cb = vdpasim_set_config_cb, + .get_vq_num_max = vdpasim_get_vq_num_max, + .get_device_id = vdpasim_get_device_id, + .get_vendor_id = vdpasim_get_vendor_id, + .get_status = vdpasim_get_status, + .set_status = vdpasim_set_status, + .reset = vdpasim_reset, + .suspend = vdpasim_suspend, + .resume = vdpasim_resume, + .get_config_size = vdpasim_get_config_size, + .get_config = vdpasim_get_config, + .set_config = vdpasim_set_config, + .get_generation = vdpasim_get_generation, + .get_iova_range = vdpasim_get_iova_range, + .set_group_asid = vdpasim_set_group_asid, + .dma_map = vdpasim_dma_map, + .dma_unmap = vdpasim_dma_unmap, + .bind_mm = vdpasim_bind_mm, + .unbind_mm = vdpasim_unbind_mm, + .free = vdpasim_free, +}; + +static const struct vdpa_config_ops vdpasim_batch_config_ops = { + .set_vq_address = vdpasim_set_vq_address, + .set_vq_num = vdpasim_set_vq_num, + .kick_vq = vdpasim_kick_vq, + .set_vq_cb = vdpasim_set_vq_cb, + .set_vq_ready = vdpasim_set_vq_ready, + .get_vq_ready = vdpasim_get_vq_ready, + .set_vq_state = vdpasim_set_vq_state, + .get_vendor_vq_stats = vdpasim_get_vq_stats, + .get_vq_state = vdpasim_get_vq_state, + .get_vq_align = vdpasim_get_vq_align, + .get_vq_group = vdpasim_get_vq_group, + .get_device_features = vdpasim_get_device_features, + .get_backend_features = vdpasim_get_backend_features, + .set_driver_features = vdpasim_set_driver_features, + .get_driver_features = vdpasim_get_driver_features, + .set_config_cb = vdpasim_set_config_cb, + .get_vq_num_max = vdpasim_get_vq_num_max, + .get_device_id = vdpasim_get_device_id, + .get_vendor_id = vdpasim_get_vendor_id, + .get_status = vdpasim_get_status, + .set_status = vdpasim_set_status, + .reset = vdpasim_reset, + .suspend = vdpasim_suspend, + .resume = vdpasim_resume, + .get_config_size = vdpasim_get_config_size, + .get_config = vdpasim_get_config, + .set_config = vdpasim_set_config, + .get_generation = vdpasim_get_generation, + .get_iova_range = vdpasim_get_iova_range, + .set_group_asid = vdpasim_set_group_asid, + .set_map = vdpasim_set_map, + .bind_mm = vdpasim_bind_mm, + .unbind_mm = vdpasim_unbind_mm, + .free = vdpasim_free, +}; + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h new file mode 100644 index 0000000000..bb137e4797 --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + */ + +#ifndef _VDPA_SIM_H +#define _VDPA_SIM_H + +#include <linux/iova.h> +#include <linux/vringh.h> +#include <linux/vdpa.h> +#include <linux/virtio_byteorder.h> +#include <linux/vhost_iotlb.h> +#include <uapi/linux/virtio_config.h> + +#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM)) + +struct vdpasim; + +struct vdpasim_virtqueue { + struct vringh vring; + struct vringh_kiov in_iov; + struct vringh_kiov out_iov; + unsigned short head; + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num; + void *private; + irqreturn_t (*cb)(void *data); +}; + +struct vdpasim_dev_attr { + struct vdpa_mgmt_dev *mgmt_dev; + const char *name; + u64 supported_features; + size_t alloc_size; + size_t config_size; + int nvqs; + u32 id; + u32 ngroups; + u32 nas; + + void (*work_fn)(struct vdpasim *vdpasim); + void (*get_config)(struct vdpasim *vdpasim, void *config); + void (*set_config)(struct vdpasim *vdpasim, const void *config); + int (*get_stats)(struct vdpasim *vdpasim, u16 idx, + struct sk_buff *msg, + struct netlink_ext_ack *extack); + void (*free)(struct vdpasim *vdpasim); +}; + +/* State of each vdpasim device */ +struct vdpasim { + struct vdpa_device vdpa; + struct vdpasim_virtqueue *vqs; + struct kthread_worker *worker; + struct kthread_work work; + struct mm_struct *mm_bound; + struct vdpasim_dev_attr dev_attr; + /* mutex to synchronize virtqueue state */ + struct mutex mutex; + /* virtio config according to device type */ + void *config; + struct vhost_iotlb *iommu; + bool *iommu_pt; + u32 status; + u32 generation; + u64 features; + u32 groups; + bool running; + bool pending_kick; + /* spinlock to synchronize iommu table */ + spinlock_t iommu_lock; +}; + +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr, + const struct vdpa_dev_set_config *config); +void vdpasim_schedule_work(struct vdpasim *vdpasim); + +/* TODO: cross-endian support */ +static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) +{ + return virtio_legacy_is_little_endian() || + (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); +} + +static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) +{ + return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) +{ + return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); +} + +static inline u32 vdpasim32_to_cpu(struct vdpasim *vdpasim, __virtio32 val) +{ + return __virtio32_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio32 cpu_to_vdpasim32(struct vdpasim *vdpasim, u32 val) +{ + return __cpu_to_virtio32(vdpasim_is_little_endian(vdpasim), val); +} + +static inline u64 vdpasim64_to_cpu(struct vdpasim *vdpasim, __virtio64 val) +{ + return __virtio64_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio64 cpu_to_vdpasim64(struct vdpasim *vdpasim, u64 val) +{ + return __cpu_to_virtio64(vdpasim_is_little_endian(vdpasim), val); +} + +#endif diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c new file mode 100644 index 0000000000..b137f36793 --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -0,0 +1,527 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA simulator for block device. + * + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2021, Red Hat Inc. All rights reserved. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/blkdev.h> +#include <linux/vringh.h> +#include <linux/vdpa.h> +#include <uapi/linux/virtio_blk.h> + +#include "vdpa_sim.h" + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>" +#define DRV_DESC "vDPA Device Simulator for block device" +#define DRV_LICENSE "GPL v2" + +#define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \ + (1ULL << VIRTIO_BLK_F_FLUSH) | \ + (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \ + (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ + (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ + (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \ + (1ULL << VIRTIO_BLK_F_MQ) | \ + (1ULL << VIRTIO_BLK_F_DISCARD) | \ + (1ULL << VIRTIO_BLK_F_WRITE_ZEROES)) + +#define VDPASIM_BLK_CAPACITY 0x40000 +#define VDPASIM_BLK_SIZE_MAX 0x1000 +#define VDPASIM_BLK_SEG_MAX 32 +#define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX + +/* 1 virtqueue, 1 address space, 1 virtqueue group */ +#define VDPASIM_BLK_VQ_NUM 1 +#define VDPASIM_BLK_AS_NUM 1 +#define VDPASIM_BLK_GROUP_NUM 1 + +struct vdpasim_blk { + struct vdpasim vdpasim; + void *buffer; + bool shared_backend; +}; + +static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim) +{ + return container_of(vdpasim, struct vdpasim_blk, vdpasim); +} + +static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim"; + +static bool shared_backend; +module_param(shared_backend, bool, 0444); +MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices"); + +static void *shared_buffer; +/* mutex to synchronize shared_buffer access */ +static DEFINE_MUTEX(shared_buffer_mutex); + +static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk) +{ + if (blk->shared_backend) + mutex_lock(&shared_buffer_mutex); +} + +static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk) +{ + if (blk->shared_backend) + mutex_unlock(&shared_buffer_mutex); +} + +static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector, + u64 num_sectors, u64 max_sectors) +{ + if (start_sector > VDPASIM_BLK_CAPACITY) { + dev_dbg(&vdpasim->vdpa.dev, + "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n", + start_sector, VDPASIM_BLK_CAPACITY); + } + + if (num_sectors > max_sectors) { + dev_dbg(&vdpasim->vdpa.dev, + "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n", + num_sectors, max_sectors); + return false; + } + + if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) { + dev_dbg(&vdpasim->vdpa.dev, + "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n", + start_sector, num_sectors, VDPASIM_BLK_CAPACITY); + return false; + } + + return true; +} + +/* Returns 'true' if the request is handled (with or without an I/O error) + * and the status is correctly written in the last byte of the 'in iov', + * 'false' otherwise. + */ +static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, + struct vdpasim_virtqueue *vq) +{ + struct vdpasim_blk *blk = sim_to_blk(vdpasim); + size_t pushed = 0, to_pull, to_push; + struct virtio_blk_outhdr hdr; + bool handled = false; + ssize_t bytes; + loff_t offset; + u64 sector; + u8 status; + u32 type; + int ret; + + ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, + &vq->head, GFP_ATOMIC); + if (ret != 1) + return false; + + if (vq->out_iov.used < 1 || vq->in_iov.used < 1) { + dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n", + vq->out_iov.used, vq->in_iov.used); + goto err; + } + + if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) { + dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n"); + goto err; + } + + /* The last byte is the status and we checked if the last iov has + * enough room for it. + */ + to_push = vringh_kiov_length(&vq->in_iov) - 1; + + to_pull = vringh_kiov_length(&vq->out_iov); + + bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr, + sizeof(hdr)); + if (bytes != sizeof(hdr)) { + dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n"); + goto err; + } + + to_pull -= bytes; + + type = vdpasim32_to_cpu(vdpasim, hdr.type); + sector = vdpasim64_to_cpu(vdpasim, hdr.sector); + offset = sector << SECTOR_SHIFT; + status = VIRTIO_BLK_S_OK; + + if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT && + sector != 0) { + dev_dbg(&vdpasim->vdpa.dev, + "sector must be 0 for %u request - sector: 0x%llx\n", + type, sector); + status = VIRTIO_BLK_S_IOERR; + goto err_status; + } + + switch (type) { + case VIRTIO_BLK_T_IN: + if (!vdpasim_blk_check_range(vdpasim, sector, + to_push >> SECTOR_SHIFT, + VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { + status = VIRTIO_BLK_S_IOERR; + break; + } + + vdpasim_blk_buffer_lock(blk); + bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, + blk->buffer + offset, to_push); + vdpasim_blk_buffer_unlock(blk); + if (bytes < 0) { + dev_dbg(&vdpasim->vdpa.dev, + "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", + bytes, offset, to_push); + status = VIRTIO_BLK_S_IOERR; + break; + } + + pushed += bytes; + break; + + case VIRTIO_BLK_T_OUT: + if (!vdpasim_blk_check_range(vdpasim, sector, + to_pull >> SECTOR_SHIFT, + VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { + status = VIRTIO_BLK_S_IOERR; + break; + } + + vdpasim_blk_buffer_lock(blk); + bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, + blk->buffer + offset, to_pull); + vdpasim_blk_buffer_unlock(blk); + if (bytes < 0) { + dev_dbg(&vdpasim->vdpa.dev, + "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", + bytes, offset, to_pull); + status = VIRTIO_BLK_S_IOERR; + break; + } + break; + + case VIRTIO_BLK_T_GET_ID: + bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, + vdpasim_blk_id, + VIRTIO_BLK_ID_BYTES); + if (bytes < 0) { + dev_dbg(&vdpasim->vdpa.dev, + "vringh_iov_push_iotlb() error: %zd\n", bytes); + status = VIRTIO_BLK_S_IOERR; + break; + } + + pushed += bytes; + break; + + case VIRTIO_BLK_T_FLUSH: + /* nothing to do */ + break; + + case VIRTIO_BLK_T_DISCARD: + case VIRTIO_BLK_T_WRITE_ZEROES: { + struct virtio_blk_discard_write_zeroes range; + u32 num_sectors, flags; + + if (to_pull != sizeof(range)) { + dev_dbg(&vdpasim->vdpa.dev, + "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n", + to_pull, sizeof(range)); + status = VIRTIO_BLK_S_IOERR; + break; + } + + bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range, + to_pull); + if (bytes < 0) { + dev_dbg(&vdpasim->vdpa.dev, + "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", + bytes, offset, to_pull); + status = VIRTIO_BLK_S_IOERR; + break; + } + + sector = le64_to_cpu(range.sector); + offset = sector << SECTOR_SHIFT; + num_sectors = le32_to_cpu(range.num_sectors); + flags = le32_to_cpu(range.flags); + + if (type == VIRTIO_BLK_T_DISCARD && flags != 0) { + dev_dbg(&vdpasim->vdpa.dev, + "discard unexpected flags set - flags: 0x%x\n", + flags); + status = VIRTIO_BLK_S_UNSUPP; + break; + } + + if (type == VIRTIO_BLK_T_WRITE_ZEROES && + flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { + dev_dbg(&vdpasim->vdpa.dev, + "write_zeroes unexpected flags set - flags: 0x%x\n", + flags); + status = VIRTIO_BLK_S_UNSUPP; + break; + } + + if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors, + VDPASIM_BLK_DWZ_MAX_SECTORS)) { + status = VIRTIO_BLK_S_IOERR; + break; + } + + if (type == VIRTIO_BLK_T_WRITE_ZEROES) { + vdpasim_blk_buffer_lock(blk); + memset(blk->buffer + offset, 0, + num_sectors << SECTOR_SHIFT); + vdpasim_blk_buffer_unlock(blk); + } + + break; + } + default: + dev_dbg(&vdpasim->vdpa.dev, + "Unsupported request type %d\n", type); + status = VIRTIO_BLK_S_IOERR; + break; + } + +err_status: + /* If some operations fail, we need to skip the remaining bytes + * to put the status in the last byte + */ + if (to_push - pushed > 0) + vringh_kiov_advance(&vq->in_iov, to_push - pushed); + + /* Last byte is the status */ + bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1); + if (bytes != 1) + goto err; + + pushed += bytes; + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + handled = true; + +err: + vringh_complete_iotlb(&vq->vring, vq->head, pushed); + + return handled; +} + +static void vdpasim_blk_work(struct vdpasim *vdpasim) +{ + bool reschedule = false; + int i; + + mutex_lock(&vdpasim->mutex); + + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + + if (!vdpasim->running) + goto out; + + for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) { + struct vdpasim_virtqueue *vq = &vdpasim->vqs[i]; + int reqs = 0; + + if (!vq->ready) + continue; + + while (vdpasim_blk_handle_req(vdpasim, vq)) { + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (vringh_need_notify_iotlb(&vq->vring) > 0) + vringh_notify(&vq->vring); + local_bh_enable(); + + if (++reqs > 4) { + reschedule = true; + break; + } + } + } +out: + mutex_unlock(&vdpasim->mutex); + + if (reschedule) + vdpasim_schedule_work(vdpasim); +} + +static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_blk_config *blk_config = config; + + memset(config, 0, sizeof(struct virtio_blk_config)); + + blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY); + blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX); + blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX); + blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM); + blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1); + blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1); + blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); + /* VIRTIO_BLK_F_DISCARD */ + blk_config->discard_sector_alignment = + cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); + blk_config->max_discard_sectors = + cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); + blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1); + /* VIRTIO_BLK_F_WRITE_ZEROES */ + blk_config->max_write_zeroes_sectors = + cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); + blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1); + +} + +static void vdpasim_blk_free(struct vdpasim *vdpasim) +{ + struct vdpasim_blk *blk = sim_to_blk(vdpasim); + + if (!blk->shared_backend) + kvfree(blk->buffer); +} + +static void vdpasim_blk_mgmtdev_release(struct device *dev) +{ +} + +static struct device vdpasim_blk_mgmtdev = { + .init_name = "vdpasim_blk", + .release = vdpasim_blk_mgmtdev_release, +}; + +static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) +{ + struct vdpasim_dev_attr dev_attr = {}; + struct vdpasim_blk *blk; + struct vdpasim *simdev; + int ret; + + dev_attr.mgmt_dev = mdev; + dev_attr.name = name; + dev_attr.id = VIRTIO_ID_BLOCK; + dev_attr.supported_features = VDPASIM_BLK_FEATURES; + dev_attr.nvqs = VDPASIM_BLK_VQ_NUM; + dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM; + dev_attr.nas = VDPASIM_BLK_AS_NUM; + dev_attr.alloc_size = sizeof(struct vdpasim_blk); + dev_attr.config_size = sizeof(struct virtio_blk_config); + dev_attr.get_config = vdpasim_blk_get_config; + dev_attr.work_fn = vdpasim_blk_work; + dev_attr.free = vdpasim_blk_free; + + simdev = vdpasim_create(&dev_attr, config); + if (IS_ERR(simdev)) + return PTR_ERR(simdev); + + blk = sim_to_blk(simdev); + blk->shared_backend = shared_backend; + + if (blk->shared_backend) { + blk->buffer = shared_buffer; + } else { + blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, + GFP_KERNEL); + if (!blk->buffer) { + ret = -ENOMEM; + goto put_dev; + } + } + + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM); + if (ret) + goto put_dev; + + return 0; + +put_dev: + put_device(&simdev->vdpa.dev); + return ret; +} + +static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev, + struct vdpa_device *dev) +{ + struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa); + + _vdpa_unregister_device(&simdev->vdpa); +} + +static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = { + .dev_add = vdpasim_blk_dev_add, + .dev_del = vdpasim_blk_dev_del +}; + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static struct vdpa_mgmt_dev mgmt_dev = { + .device = &vdpasim_blk_mgmtdev, + .id_table = id_table, + .ops = &vdpasim_blk_mgmtdev_ops, +}; + +static int __init vdpasim_blk_init(void) +{ + int ret; + + ret = device_register(&vdpasim_blk_mgmtdev); + if (ret) { + put_device(&vdpasim_blk_mgmtdev); + return ret; + } + + ret = vdpa_mgmtdev_register(&mgmt_dev); + if (ret) + goto parent_err; + + if (shared_backend) { + shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, + GFP_KERNEL); + if (!shared_buffer) { + ret = -ENOMEM; + goto mgmt_dev_err; + } + } + + return 0; +mgmt_dev_err: + vdpa_mgmtdev_unregister(&mgmt_dev); +parent_err: + device_unregister(&vdpasim_blk_mgmtdev); + return ret; +} + +static void __exit vdpasim_blk_exit(void) +{ + kvfree(shared_buffer); + vdpa_mgmtdev_unregister(&mgmt_dev); + device_unregister(&vdpasim_blk_mgmtdev); +} + +module_init(vdpasim_blk_init) +module_exit(vdpasim_blk_exit) + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c new file mode 100644 index 0000000000..cfe9629118 --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -0,0 +1,564 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA simulator for networking device. + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang <jasowang@redhat.com> + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/etherdevice.h> +#include <linux/vringh.h> +#include <linux/vdpa.h> +#include <net/netlink.h> +#include <uapi/linux/virtio_net.h> +#include <uapi/linux/vdpa.h> + +#include "vdpa_sim.h" + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Jason Wang <jasowang@redhat.com>" +#define DRV_DESC "vDPA Device Simulator for networking device" +#define DRV_LICENSE "GPL v2" + +#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ + (1ULL << VIRTIO_NET_F_MAC) | \ + (1ULL << VIRTIO_NET_F_STATUS) | \ + (1ULL << VIRTIO_NET_F_MTU) | \ + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ + (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR)) + +/* 3 virtqueues, 2 address spaces, 2 virtqueue groups */ +#define VDPASIM_NET_VQ_NUM 3 +#define VDPASIM_NET_AS_NUM 2 +#define VDPASIM_NET_GROUP_NUM 2 + +struct vdpasim_dataq_stats { + struct u64_stats_sync syncp; + u64 pkts; + u64 bytes; + u64 drops; + u64 errors; + u64 overruns; +}; + +struct vdpasim_cq_stats { + struct u64_stats_sync syncp; + u64 requests; + u64 successes; + u64 errors; +}; + +struct vdpasim_net{ + struct vdpasim vdpasim; + struct vdpasim_dataq_stats tx_stats; + struct vdpasim_dataq_stats rx_stats; + struct vdpasim_cq_stats cq_stats; + void *buffer; +}; + +static struct vdpasim_net *sim_to_net(struct vdpasim *vdpasim) +{ + return container_of(vdpasim, struct vdpasim_net, vdpasim); +} + +static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len) +{ + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + vringh_complete_iotlb(&vq->vring, vq->head, len); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (vringh_need_notify_iotlb(&vq->vring) > 0) + vringh_notify(&vq->vring); + local_bh_enable(); +} + +static bool receive_filter(struct vdpasim *vdpasim, size_t len) +{ + bool modern = vdpasim->features & (1ULL << VIRTIO_F_VERSION_1); + size_t hdr_len = modern ? sizeof(struct virtio_net_hdr_v1) : + sizeof(struct virtio_net_hdr); + struct virtio_net_config *vio_config = vdpasim->config; + struct vdpasim_net *net = sim_to_net(vdpasim); + + if (len < ETH_ALEN + hdr_len) + return false; + + if (is_broadcast_ether_addr(net->buffer + hdr_len) || + is_multicast_ether_addr(net->buffer + hdr_len)) + return true; + if (!strncmp(net->buffer + hdr_len, vio_config->mac, ETH_ALEN)) + return true; + + return false; +} + +static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim, + u8 cmd) +{ + struct virtio_net_config *vio_config = vdpasim->config; + struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2]; + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + size_t read; + + switch (cmd) { + case VIRTIO_NET_CTRL_MAC_ADDR_SET: + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, + vio_config->mac, ETH_ALEN); + if (read == ETH_ALEN) + status = VIRTIO_NET_OK; + break; + default: + break; + } + + return status; +} + +static void vdpasim_handle_cvq(struct vdpasim *vdpasim) +{ + struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2]; + struct vdpasim_net *net = sim_to_net(vdpasim); + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; + struct virtio_net_ctrl_hdr ctrl; + size_t read, write; + u64 requests = 0, errors = 0, successes = 0; + int err; + + if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ))) + return; + + if (!cvq->ready) + return; + + while (true) { + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->in_iov, + &cvq->out_iov, + &cvq->head, GFP_ATOMIC); + if (err <= 0) + break; + + ++requests; + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl, + sizeof(ctrl)); + if (read != sizeof(ctrl)) { + ++errors; + break; + } + + switch (ctrl.class) { + case VIRTIO_NET_CTRL_MAC: + status = vdpasim_handle_ctrl_mac(vdpasim, ctrl.cmd); + break; + default: + break; + } + + if (status == VIRTIO_NET_OK) + ++successes; + else + ++errors; + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->out_iov, + &status, sizeof(status)); + vringh_complete_iotlb(&cvq->vring, cvq->head, write); + vringh_kiov_cleanup(&cvq->in_iov); + vringh_kiov_cleanup(&cvq->out_iov); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (cvq->cb) + cvq->cb(cvq->private); + local_bh_enable(); + } + + u64_stats_update_begin(&net->cq_stats.syncp); + net->cq_stats.requests += requests; + net->cq_stats.errors += errors; + net->cq_stats.successes += successes; + u64_stats_update_end(&net->cq_stats.syncp); +} + +static void vdpasim_net_work(struct vdpasim *vdpasim) +{ + struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; + struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; + struct vdpasim_net *net = sim_to_net(vdpasim); + ssize_t read, write; + u64 tx_pkts = 0, rx_pkts = 0, tx_bytes = 0, rx_bytes = 0; + u64 rx_drops = 0, rx_overruns = 0, rx_errors = 0, tx_errors = 0; + int err; + + mutex_lock(&vdpasim->mutex); + + if (!vdpasim->running) + goto out; + + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + + vdpasim_handle_cvq(vdpasim); + + if (!txq->ready || !rxq->ready) + goto out; + + while (true) { + err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, + &txq->head, GFP_ATOMIC); + if (err <= 0) { + if (err) + ++tx_errors; + break; + } + + ++tx_pkts; + read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, + net->buffer, PAGE_SIZE); + + tx_bytes += read; + + if (!receive_filter(vdpasim, read)) { + ++rx_drops; + vdpasim_net_complete(txq, 0); + continue; + } + + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, + &rxq->head, GFP_ATOMIC); + if (err <= 0) { + ++rx_overruns; + vdpasim_net_complete(txq, 0); + break; + } + + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, + net->buffer, read); + if (write <= 0) { + ++rx_errors; + break; + } + + ++rx_pkts; + rx_bytes += write; + + vdpasim_net_complete(txq, 0); + vdpasim_net_complete(rxq, write); + + if (tx_pkts > 4) { + vdpasim_schedule_work(vdpasim); + goto out; + } + } + +out: + mutex_unlock(&vdpasim->mutex); + + u64_stats_update_begin(&net->tx_stats.syncp); + net->tx_stats.pkts += tx_pkts; + net->tx_stats.bytes += tx_bytes; + net->tx_stats.errors += tx_errors; + u64_stats_update_end(&net->tx_stats.syncp); + + u64_stats_update_begin(&net->rx_stats.syncp); + net->rx_stats.pkts += rx_pkts; + net->rx_stats.bytes += rx_bytes; + net->rx_stats.drops += rx_drops; + net->rx_stats.errors += rx_errors; + net->rx_stats.overruns += rx_overruns; + u64_stats_update_end(&net->rx_stats.syncp); +} + +static int vdpasim_net_get_stats(struct vdpasim *vdpasim, u16 idx, + struct sk_buff *msg, + struct netlink_ext_ack *extack) +{ + struct vdpasim_net *net = sim_to_net(vdpasim); + u64 rx_pkts, rx_bytes, rx_errors, rx_overruns, rx_drops; + u64 tx_pkts, tx_bytes, tx_errors, tx_drops; + u64 cq_requests, cq_successes, cq_errors; + unsigned int start; + int err = -EMSGSIZE; + + switch(idx) { + case 0: + do { + start = u64_stats_fetch_begin(&net->rx_stats.syncp); + rx_pkts = net->rx_stats.pkts; + rx_bytes = net->rx_stats.bytes; + rx_errors = net->rx_stats.errors; + rx_overruns = net->rx_stats.overruns; + rx_drops = net->rx_stats.drops; + } while (u64_stats_fetch_retry(&net->rx_stats.syncp, start)); + + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "rx packets")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + rx_pkts, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "rx bytes")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + rx_bytes, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "rx errors")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + rx_errors, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "rx overruns")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + rx_overruns, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "rx drops")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + rx_drops, VDPA_ATTR_PAD)) + break; + err = 0; + break; + case 1: + do { + start = u64_stats_fetch_begin(&net->tx_stats.syncp); + tx_pkts = net->tx_stats.pkts; + tx_bytes = net->tx_stats.bytes; + tx_errors = net->tx_stats.errors; + tx_drops = net->tx_stats.drops; + } while (u64_stats_fetch_retry(&net->tx_stats.syncp, start)); + + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "tx packets")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + tx_pkts, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "tx bytes")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + tx_bytes, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "tx errors")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + tx_errors, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "tx drops")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + tx_drops, VDPA_ATTR_PAD)) + break; + err = 0; + break; + case 2: + do { + start = u64_stats_fetch_begin(&net->cq_stats.syncp); + cq_requests = net->cq_stats.requests; + cq_successes = net->cq_stats.successes; + cq_errors = net->cq_stats.errors; + } while (u64_stats_fetch_retry(&net->cq_stats.syncp, start)); + + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "cvq requests")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + cq_requests, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "cvq successes")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + cq_successes, VDPA_ATTR_PAD)) + break; + if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, + "cvq errors")) + break; + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, + cq_errors, VDPA_ATTR_PAD)) + break; + err = 0; + break; + default: + err = -EINVAL; + break; + } + + return err; +} + +static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_net_config *net_config = config; + + net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); +} + +static void vdpasim_net_setup_config(struct vdpasim *vdpasim, + const struct vdpa_dev_set_config *config) +{ + struct virtio_net_config *vio_config = vdpasim->config; + + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) + memcpy(vio_config->mac, config->net.mac, ETH_ALEN); + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) + vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu); + else + /* Setup default MTU to be 1500 */ + vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); +} + +static void vdpasim_net_free(struct vdpasim *vdpasim) +{ + struct vdpasim_net *net = sim_to_net(vdpasim); + + kvfree(net->buffer); +} + +static void vdpasim_net_mgmtdev_release(struct device *dev) +{ +} + +static struct device vdpasim_net_mgmtdev = { + .init_name = "vdpasim_net", + .release = vdpasim_net_mgmtdev_release, +}; + +static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, + const struct vdpa_dev_set_config *config) +{ + struct vdpasim_dev_attr dev_attr = {}; + struct vdpasim_net *net; + struct vdpasim *simdev; + int ret; + + dev_attr.mgmt_dev = mdev; + dev_attr.name = name; + dev_attr.id = VIRTIO_ID_NET; + dev_attr.supported_features = VDPASIM_NET_FEATURES; + dev_attr.nvqs = VDPASIM_NET_VQ_NUM; + dev_attr.ngroups = VDPASIM_NET_GROUP_NUM; + dev_attr.nas = VDPASIM_NET_AS_NUM; + dev_attr.alloc_size = sizeof(struct vdpasim_net); + dev_attr.config_size = sizeof(struct virtio_net_config); + dev_attr.get_config = vdpasim_net_get_config; + dev_attr.work_fn = vdpasim_net_work; + dev_attr.get_stats = vdpasim_net_get_stats; + dev_attr.free = vdpasim_net_free; + + simdev = vdpasim_create(&dev_attr, config); + if (IS_ERR(simdev)) + return PTR_ERR(simdev); + + vdpasim_net_setup_config(simdev, config); + + net = sim_to_net(simdev); + + u64_stats_init(&net->tx_stats.syncp); + u64_stats_init(&net->rx_stats.syncp); + u64_stats_init(&net->cq_stats.syncp); + + net->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL); + if (!net->buffer) { + ret = -ENOMEM; + goto reg_err; + } + + /* + * Initialization must be completed before this call, since it can + * connect the device to the vDPA bus, so requests can arrive after + * this call. + */ + ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); + if (ret) + goto reg_err; + + return 0; + +reg_err: + put_device(&simdev->vdpa.dev); + return ret; +} + +static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev, + struct vdpa_device *dev) +{ + struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa); + + _vdpa_unregister_device(&simdev->vdpa); +} + +static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = { + .dev_add = vdpasim_net_dev_add, + .dev_del = vdpasim_net_dev_del +}; + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + +static struct vdpa_mgmt_dev mgmt_dev = { + .device = &vdpasim_net_mgmtdev, + .id_table = id_table, + .ops = &vdpasim_net_mgmtdev_ops, + .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | + 1 << VDPA_ATTR_DEV_NET_CFG_MTU | + 1 << VDPA_ATTR_DEV_FEATURES), + .max_supported_vqs = VDPASIM_NET_VQ_NUM, + .supported_features = VDPASIM_NET_FEATURES, +}; + +static int __init vdpasim_net_init(void) +{ + int ret; + + ret = device_register(&vdpasim_net_mgmtdev); + if (ret) { + put_device(&vdpasim_net_mgmtdev); + return ret; + } + + ret = vdpa_mgmtdev_register(&mgmt_dev); + if (ret) + goto parent_err; + return 0; + +parent_err: + device_unregister(&vdpasim_net_mgmtdev); + return ret; +} + +static void __exit vdpasim_net_exit(void) +{ + vdpa_mgmtdev_unregister(&mgmt_dev); + device_unregister(&vdpasim_net_mgmtdev); +} + +module_init(vdpasim_net_init); +module_exit(vdpasim_net_exit); + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); |