summaryrefslogtreecommitdiffstats
path: root/drivers/misc/mic/vop
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
commit76cb841cb886eef6b3bee341a2266c76578724ad (patch)
treef5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /drivers/misc/mic/vop
parentInitial commit. (diff)
downloadlinux-upstream.tar.xz
linux-upstream.zip
Adding upstream version 4.19.249.upstream/4.19.249upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--drivers/misc/mic/vop/Makefile9
-rw-r--r--drivers/misc/mic/vop/vop_debugfs.c232
-rw-r--r--drivers/misc/mic/vop/vop_main.c766
-rw-r--r--drivers/misc/mic/vop/vop_main.h170
-rw-r--r--drivers/misc/mic/vop/vop_vringh.c1169
5 files changed, 2346 insertions, 0 deletions
diff --git a/drivers/misc/mic/vop/Makefile b/drivers/misc/mic/vop/Makefile
new file mode 100644
index 000000000..78819c899
--- /dev/null
+++ b/drivers/misc/mic/vop/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile - Intel MIC Linux driver.
+# Copyright(c) 2016, Intel Corporation.
+#
+obj-m := vop.o
+
+vop-objs += vop_main.o
+vop-objs += vop_debugfs.o
+vop-objs += vop_vringh.o
diff --git a/drivers/misc/mic/vop/vop_debugfs.c b/drivers/misc/mic/vop/vop_debugfs.c
new file mode 100644
index 000000000..ab43884e5
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_debugfs.c
@@ -0,0 +1,232 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "vop_main.h"
+
+static int vop_dp_show(struct seq_file *s, void *pos)
+{
+ struct mic_device_desc *d;
+ struct mic_device_ctrl *dc;
+ struct mic_vqconfig *vqconfig;
+ __u32 *features;
+ __u8 *config;
+ struct vop_info *vi = s->private;
+ struct vop_device *vpdev = vi->vpdev;
+ struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+ int j, k;
+
+ seq_printf(s, "Bootparam: magic 0x%x\n",
+ bootparam->magic);
+ seq_printf(s, "Bootparam: h2c_config_db %d\n",
+ bootparam->h2c_config_db);
+ seq_printf(s, "Bootparam: node_id %d\n",
+ bootparam->node_id);
+ seq_printf(s, "Bootparam: c2h_scif_db %d\n",
+ bootparam->c2h_scif_db);
+ seq_printf(s, "Bootparam: h2c_scif_db %d\n",
+ bootparam->h2c_scif_db);
+ seq_printf(s, "Bootparam: scif_host_dma_addr 0x%llx\n",
+ bootparam->scif_host_dma_addr);
+ seq_printf(s, "Bootparam: scif_card_dma_addr 0x%llx\n",
+ bootparam->scif_card_dma_addr);
+
+ for (j = sizeof(*bootparam);
+ j < MIC_DP_SIZE; j += mic_total_desc_size(d)) {
+ d = (void *)bootparam + j;
+ dc = (void *)d + mic_aligned_desc_size(d);
+
+ /* end of list */
+ if (d->type == 0)
+ break;
+
+ if (d->type == -1)
+ continue;
+
+ seq_printf(s, "Type %d ", d->type);
+ seq_printf(s, "Num VQ %d ", d->num_vq);
+ seq_printf(s, "Feature Len %d\n", d->feature_len);
+ seq_printf(s, "Config Len %d ", d->config_len);
+ seq_printf(s, "Shutdown Status %d\n", d->status);
+
+ for (k = 0; k < d->num_vq; k++) {
+ vqconfig = mic_vq_config(d) + k;
+ seq_printf(s, "vqconfig[%d]: ", k);
+ seq_printf(s, "address 0x%llx ",
+ vqconfig->address);
+ seq_printf(s, "num %d ", vqconfig->num);
+ seq_printf(s, "used address 0x%llx\n",
+ vqconfig->used_address);
+ }
+
+ features = (__u32 *)mic_vq_features(d);
+ seq_printf(s, "Features: Host 0x%x ", features[0]);
+ seq_printf(s, "Guest 0x%x\n", features[1]);
+
+ config = mic_vq_configspace(d);
+ for (k = 0; k < d->config_len; k++)
+ seq_printf(s, "config[%d]=%d\n", k, config[k]);
+
+ seq_puts(s, "Device control:\n");
+ seq_printf(s, "Config Change %d ", dc->config_change);
+ seq_printf(s, "Vdev reset %d\n", dc->vdev_reset);
+ seq_printf(s, "Guest Ack %d ", dc->guest_ack);
+ seq_printf(s, "Host ack %d\n", dc->host_ack);
+ seq_printf(s, "Used address updated %d ",
+ dc->used_address_updated);
+ seq_printf(s, "Vdev 0x%llx\n", dc->vdev);
+ seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db);
+ seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db);
+ }
+ schedule_work(&vi->hotplug_work);
+ return 0;
+}
+
+static int vop_dp_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vop_dp_show, inode->i_private);
+}
+
+static int vop_dp_debug_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations dp_ops = {
+ .owner = THIS_MODULE,
+ .open = vop_dp_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = vop_dp_debug_release
+};
+
+static int vop_vdev_info_show(struct seq_file *s, void *unused)
+{
+ struct vop_info *vi = s->private;
+ struct list_head *pos, *tmp;
+ struct vop_vdev *vdev;
+ int i, j;
+
+ mutex_lock(&vi->vop_mutex);
+ list_for_each_safe(pos, tmp, &vi->vdev_list) {
+ vdev = list_entry(pos, struct vop_vdev, list);
+ seq_printf(s, "VDEV type %d state %s in %ld out %ld in_dma %ld out_dma %ld\n",
+ vdev->virtio_id,
+ vop_vdevup(vdev) ? "UP" : "DOWN",
+ vdev->in_bytes,
+ vdev->out_bytes,
+ vdev->in_bytes_dma,
+ vdev->out_bytes_dma);
+ for (i = 0; i < MIC_MAX_VRINGS; i++) {
+ struct vring_desc *desc;
+ struct vring_avail *avail;
+ struct vring_used *used;
+ struct vop_vringh *vvr = &vdev->vvr[i];
+ struct vringh *vrh = &vvr->vrh;
+ int num = vrh->vring.num;
+
+ if (!num)
+ continue;
+ desc = vrh->vring.desc;
+ seq_printf(s, "vring i %d avail_idx %d",
+ i, vvr->vring.info->avail_idx & (num - 1));
+ seq_printf(s, " vring i %d avail_idx %d\n",
+ i, vvr->vring.info->avail_idx);
+ seq_printf(s, "vrh i %d weak_barriers %d",
+ i, vrh->weak_barriers);
+ seq_printf(s, " last_avail_idx %d last_used_idx %d",
+ vrh->last_avail_idx, vrh->last_used_idx);
+ seq_printf(s, " completed %d\n", vrh->completed);
+ for (j = 0; j < num; j++) {
+ seq_printf(s, "desc[%d] addr 0x%llx len %d",
+ j, desc->addr, desc->len);
+ seq_printf(s, " flags 0x%x next %d\n",
+ desc->flags, desc->next);
+ desc++;
+ }
+ avail = vrh->vring.avail;
+ seq_printf(s, "avail flags 0x%x idx %d\n",
+ vringh16_to_cpu(vrh, avail->flags),
+ vringh16_to_cpu(vrh,
+ avail->idx) & (num - 1));
+ seq_printf(s, "avail flags 0x%x idx %d\n",
+ vringh16_to_cpu(vrh, avail->flags),
+ vringh16_to_cpu(vrh, avail->idx));
+ for (j = 0; j < num; j++)
+ seq_printf(s, "avail ring[%d] %d\n",
+ j, avail->ring[j]);
+ used = vrh->vring.used;
+ seq_printf(s, "used flags 0x%x idx %d\n",
+ vringh16_to_cpu(vrh, used->flags),
+ vringh16_to_cpu(vrh, used->idx) & (num - 1));
+ seq_printf(s, "used flags 0x%x idx %d\n",
+ vringh16_to_cpu(vrh, used->flags),
+ vringh16_to_cpu(vrh, used->idx));
+ for (j = 0; j < num; j++)
+ seq_printf(s, "used ring[%d] id %d len %d\n",
+ j, vringh32_to_cpu(vrh,
+ used->ring[j].id),
+ vringh32_to_cpu(vrh,
+ used->ring[j].len));
+ }
+ }
+ mutex_unlock(&vi->vop_mutex);
+
+ return 0;
+}
+
+static int vop_vdev_info_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, vop_vdev_info_show, inode->i_private);
+}
+
+static int vop_vdev_info_debug_release(struct inode *inode, struct file *file)
+{
+ return single_release(inode, file);
+}
+
+static const struct file_operations vdev_info_ops = {
+ .owner = THIS_MODULE,
+ .open = vop_vdev_info_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = vop_vdev_info_debug_release
+};
+
+void vop_init_debugfs(struct vop_info *vi)
+{
+ char name[16];
+
+ snprintf(name, sizeof(name), "%s%d", KBUILD_MODNAME, vi->vpdev->dnode);
+ vi->dbg = debugfs_create_dir(name, NULL);
+ if (!vi->dbg) {
+ pr_err("can't create debugfs dir vop\n");
+ return;
+ }
+ debugfs_create_file("dp", 0444, vi->dbg, vi, &dp_ops);
+ debugfs_create_file("vdev_info", 0444, vi->dbg, vi, &vdev_info_ops);
+}
+
+void vop_exit_debugfs(struct vop_info *vi)
+{
+ debugfs_remove_recursive(vi->dbg);
+}
diff --git a/drivers/misc/mic/vop/vop_main.c b/drivers/misc/mic/vop/vop_main.c
new file mode 100644
index 000000000..f4332a97c
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_main.c
@@ -0,0 +1,766 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Adapted from:
+ *
+ * virtio for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Author(s): Christian Borntraeger <borntraeger@de.ibm.com>
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/dma-mapping.h>
+
+#include "vop_main.h"
+
+#define VOP_MAX_VRINGS 4
+
+/*
+ * _vop_vdev - Allocated per virtio device instance injected by the peer.
+ *
+ * @vdev: Virtio device
+ * @desc: Virtio device page descriptor
+ * @dc: Virtio device control
+ * @vpdev: VOP device which is the parent for this virtio device
+ * @vr: Buffer for accessing the VRING
+ * @used: Buffer for used
+ * @used_size: Size of the used buffer
+ * @reset_done: Track whether VOP reset is complete
+ * @virtio_cookie: Cookie returned upon requesting a interrupt
+ * @c2h_vdev_db: The doorbell used by the guest to interrupt the host
+ * @h2c_vdev_db: The doorbell used by the host to interrupt the guest
+ * @dnode: The destination node
+ */
+struct _vop_vdev {
+ struct virtio_device vdev;
+ struct mic_device_desc __iomem *desc;
+ struct mic_device_ctrl __iomem *dc;
+ struct vop_device *vpdev;
+ void __iomem *vr[VOP_MAX_VRINGS];
+ dma_addr_t used[VOP_MAX_VRINGS];
+ int used_size[VOP_MAX_VRINGS];
+ struct completion reset_done;
+ struct mic_irq *virtio_cookie;
+ int c2h_vdev_db;
+ int h2c_vdev_db;
+ int dnode;
+};
+
+#define to_vopvdev(vd) container_of(vd, struct _vop_vdev, vdev)
+
+#define _vop_aligned_desc_size(d) __mic_align(_vop_desc_size(d), 8)
+
+/* Helper API to obtain the parent of the virtio device */
+static inline struct device *_vop_dev(struct _vop_vdev *vdev)
+{
+ return vdev->vdev.dev.parent;
+}
+
+static inline unsigned _vop_desc_size(struct mic_device_desc __iomem *desc)
+{
+ return sizeof(*desc)
+ + ioread8(&desc->num_vq) * sizeof(struct mic_vqconfig)
+ + ioread8(&desc->feature_len) * 2
+ + ioread8(&desc->config_len);
+}
+
+static inline struct mic_vqconfig __iomem *
+_vop_vq_config(struct mic_device_desc __iomem *desc)
+{
+ return (struct mic_vqconfig __iomem *)(desc + 1);
+}
+
+static inline u8 __iomem *
+_vop_vq_features(struct mic_device_desc __iomem *desc)
+{
+ return (u8 __iomem *)(_vop_vq_config(desc) + ioread8(&desc->num_vq));
+}
+
+static inline u8 __iomem *
+_vop_vq_configspace(struct mic_device_desc __iomem *desc)
+{
+ return _vop_vq_features(desc) + ioread8(&desc->feature_len) * 2;
+}
+
+static inline unsigned
+_vop_total_desc_size(struct mic_device_desc __iomem *desc)
+{
+ return _vop_aligned_desc_size(desc) + sizeof(struct mic_device_ctrl);
+}
+
+/* This gets the device's feature bits. */
+static u64 vop_get_features(struct virtio_device *vdev)
+{
+ unsigned int i, bits;
+ u32 features = 0;
+ struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+ u8 __iomem *in_features = _vop_vq_features(desc);
+ int feature_len = ioread8(&desc->feature_len);
+
+ bits = min_t(unsigned, feature_len, sizeof(vdev->features)) * 8;
+ for (i = 0; i < bits; i++)
+ if (ioread8(&in_features[i / 8]) & (BIT(i % 8)))
+ features |= BIT(i);
+
+ return features;
+}
+
+static int vop_finalize_features(struct virtio_device *vdev)
+{
+ unsigned int i, bits;
+ struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+ u8 feature_len = ioread8(&desc->feature_len);
+ /* Second half of bitmap is features we accept. */
+ u8 __iomem *out_features =
+ _vop_vq_features(desc) + feature_len;
+
+ /* Give virtio_ring a chance to accept features. */
+ vring_transport_features(vdev);
+
+ memset_io(out_features, 0, feature_len);
+ bits = min_t(unsigned, feature_len,
+ sizeof(vdev->features)) * 8;
+ for (i = 0; i < bits; i++) {
+ if (__virtio_test_bit(vdev, i))
+ iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)),
+ &out_features[i / 8]);
+ }
+ return 0;
+}
+
+/*
+ * Reading and writing elements in config space
+ */
+static void vop_get(struct virtio_device *vdev, unsigned int offset,
+ void *buf, unsigned len)
+{
+ struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+
+ if (offset + len > ioread8(&desc->config_len))
+ return;
+ memcpy_fromio(buf, _vop_vq_configspace(desc) + offset, len);
+}
+
+static void vop_set(struct virtio_device *vdev, unsigned int offset,
+ const void *buf, unsigned len)
+{
+ struct mic_device_desc __iomem *desc = to_vopvdev(vdev)->desc;
+
+ if (offset + len > ioread8(&desc->config_len))
+ return;
+ memcpy_toio(_vop_vq_configspace(desc) + offset, buf, len);
+}
+
+/*
+ * The operations to get and set the status word just access the status
+ * field of the device descriptor. set_status also interrupts the host
+ * to tell about status changes.
+ */
+static u8 vop_get_status(struct virtio_device *vdev)
+{
+ return ioread8(&to_vopvdev(vdev)->desc->status);
+}
+
+static void vop_set_status(struct virtio_device *dev, u8 status)
+{
+ struct _vop_vdev *vdev = to_vopvdev(dev);
+ struct vop_device *vpdev = vdev->vpdev;
+
+ if (!status)
+ return;
+ iowrite8(status, &vdev->desc->status);
+ vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+}
+
+/* Inform host on a virtio device reset and wait for ack from host */
+static void vop_reset_inform_host(struct virtio_device *dev)
+{
+ struct _vop_vdev *vdev = to_vopvdev(dev);
+ struct mic_device_ctrl __iomem *dc = vdev->dc;
+ struct vop_device *vpdev = vdev->vpdev;
+ int retry;
+
+ iowrite8(0, &dc->host_ack);
+ iowrite8(1, &dc->vdev_reset);
+ vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+
+ /* Wait till host completes all card accesses and acks the reset */
+ for (retry = 100; retry--;) {
+ if (ioread8(&dc->host_ack))
+ break;
+ msleep(100);
+ };
+
+ dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
+
+ /* Reset status to 0 in case we timed out */
+ iowrite8(0, &vdev->desc->status);
+}
+
+static void vop_reset(struct virtio_device *dev)
+{
+ struct _vop_vdev *vdev = to_vopvdev(dev);
+
+ dev_dbg(_vop_dev(vdev), "%s: virtio id %d\n",
+ __func__, dev->id.device);
+
+ vop_reset_inform_host(dev);
+ complete_all(&vdev->reset_done);
+}
+
+/*
+ * The virtio_ring code calls this API when it wants to notify the Host.
+ */
+static bool vop_notify(struct virtqueue *vq)
+{
+ struct _vop_vdev *vdev = vq->priv;
+ struct vop_device *vpdev = vdev->vpdev;
+
+ vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+ return true;
+}
+
+static void vop_del_vq(struct virtqueue *vq, int n)
+{
+ struct _vop_vdev *vdev = to_vopvdev(vq->vdev);
+ struct vring *vr = (struct vring *)(vq + 1);
+ struct vop_device *vpdev = vdev->vpdev;
+
+ dma_unmap_single(&vpdev->dev, vdev->used[n],
+ vdev->used_size[n], DMA_BIDIRECTIONAL);
+ free_pages((unsigned long)vr->used, get_order(vdev->used_size[n]));
+ vring_del_virtqueue(vq);
+ vpdev->hw_ops->iounmap(vpdev, vdev->vr[n]);
+ vdev->vr[n] = NULL;
+}
+
+static void vop_del_vqs(struct virtio_device *dev)
+{
+ struct _vop_vdev *vdev = to_vopvdev(dev);
+ struct virtqueue *vq, *n;
+ int idx = 0;
+
+ dev_dbg(_vop_dev(vdev), "%s\n", __func__);
+
+ list_for_each_entry_safe(vq, n, &dev->vqs, list)
+ vop_del_vq(vq, idx++);
+}
+
+/*
+ * This routine will assign vring's allocated in host/io memory. Code in
+ * virtio_ring.c however continues to access this io memory as if it were local
+ * memory without io accessors.
+ */
+static struct virtqueue *vop_find_vq(struct virtio_device *dev,
+ unsigned index,
+ void (*callback)(struct virtqueue *vq),
+ const char *name, bool ctx)
+{
+ struct _vop_vdev *vdev = to_vopvdev(dev);
+ struct vop_device *vpdev = vdev->vpdev;
+ struct mic_vqconfig __iomem *vqconfig;
+ struct mic_vqconfig config;
+ struct virtqueue *vq;
+ void __iomem *va;
+ struct _mic_vring_info __iomem *info;
+ void *used;
+ int vr_size, _vr_size, err, magic;
+ struct vring *vr;
+ u8 type = ioread8(&vdev->desc->type);
+
+ if (index >= ioread8(&vdev->desc->num_vq))
+ return ERR_PTR(-ENOENT);
+
+ if (!name)
+ return ERR_PTR(-ENOENT);
+
+ /* First assign the vring's allocated in host memory */
+ vqconfig = _vop_vq_config(vdev->desc) + index;
+ memcpy_fromio(&config, vqconfig, sizeof(config));
+ _vr_size = round_up(vring_size(le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN), 4);
+ vr_size = PAGE_ALIGN(_vr_size + sizeof(struct _mic_vring_info));
+ va = vpdev->hw_ops->ioremap(vpdev, le64_to_cpu(config.address),
+ vr_size);
+ if (!va)
+ return ERR_PTR(-ENOMEM);
+ vdev->vr[index] = va;
+ memset_io(va, 0x0, _vr_size);
+ vq = vring_new_virtqueue(
+ index,
+ le16_to_cpu(config.num), MIC_VIRTIO_RING_ALIGN,
+ dev,
+ false,
+ ctx,
+ (void __force *)va, vop_notify, callback, name);
+ if (!vq) {
+ err = -ENOMEM;
+ goto unmap;
+ }
+ info = va + _vr_size;
+ magic = ioread32(&info->magic);
+
+ if (WARN(magic != MIC_MAGIC + type + index, "magic mismatch")) {
+ err = -EIO;
+ goto unmap;
+ }
+
+ /* Allocate and reassign used ring now */
+ vdev->used_size[index] = PAGE_ALIGN(sizeof(__u16) * 3 +
+ sizeof(struct vring_used_elem) *
+ le16_to_cpu(config.num));
+ used = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(vdev->used_size[index]));
+ if (!used) {
+ err = -ENOMEM;
+ dev_err(_vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto del_vq;
+ }
+ vdev->used[index] = dma_map_single(&vpdev->dev, used,
+ vdev->used_size[index],
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&vpdev->dev, vdev->used[index])) {
+ err = -ENOMEM;
+ dev_err(_vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto free_used;
+ }
+ writeq(vdev->used[index], &vqconfig->used_address);
+ /*
+ * To reassign the used ring here we are directly accessing
+ * struct vring_virtqueue which is a private data structure
+ * in virtio_ring.c. At the minimum, a BUILD_BUG_ON() in
+ * vring_new_virtqueue() would ensure that
+ * (&vq->vring == (struct vring *) (&vq->vq + 1));
+ */
+ vr = (struct vring *)(vq + 1);
+ vr->used = used;
+
+ vq->priv = vdev;
+ return vq;
+free_used:
+ free_pages((unsigned long)used,
+ get_order(vdev->used_size[index]));
+del_vq:
+ vring_del_virtqueue(vq);
+unmap:
+ vpdev->hw_ops->iounmap(vpdev, vdev->vr[index]);
+ return ERR_PTR(err);
+}
+
+static int vop_find_vqs(struct virtio_device *dev, unsigned nvqs,
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[], const bool *ctx,
+ struct irq_affinity *desc)
+{
+ struct _vop_vdev *vdev = to_vopvdev(dev);
+ struct vop_device *vpdev = vdev->vpdev;
+ struct mic_device_ctrl __iomem *dc = vdev->dc;
+ int i, err, retry;
+
+ /* We must have this many virtqueues. */
+ if (nvqs > ioread8(&vdev->desc->num_vq))
+ return -ENOENT;
+
+ for (i = 0; i < nvqs; ++i) {
+ dev_dbg(_vop_dev(vdev), "%s: %d: %s\n",
+ __func__, i, names[i]);
+ vqs[i] = vop_find_vq(dev, i, callbacks[i], names[i],
+ ctx ? ctx[i] : false);
+ if (IS_ERR(vqs[i])) {
+ err = PTR_ERR(vqs[i]);
+ goto error;
+ }
+ }
+
+ iowrite8(1, &dc->used_address_updated);
+ /*
+ * Send an interrupt to the host to inform it that used
+ * rings have been re-assigned.
+ */
+ vpdev->hw_ops->send_intr(vpdev, vdev->c2h_vdev_db);
+ for (retry = 100; --retry;) {
+ if (!ioread8(&dc->used_address_updated))
+ break;
+ msleep(100);
+ };
+
+ dev_dbg(_vop_dev(vdev), "%s: retry: %d\n", __func__, retry);
+ if (!retry) {
+ err = -ENODEV;
+ goto error;
+ }
+
+ return 0;
+error:
+ vop_del_vqs(dev);
+ return err;
+}
+
+/*
+ * The config ops structure as defined by virtio config
+ */
+static struct virtio_config_ops vop_vq_config_ops = {
+ .get_features = vop_get_features,
+ .finalize_features = vop_finalize_features,
+ .get = vop_get,
+ .set = vop_set,
+ .get_status = vop_get_status,
+ .set_status = vop_set_status,
+ .reset = vop_reset,
+ .find_vqs = vop_find_vqs,
+ .del_vqs = vop_del_vqs,
+};
+
+static irqreturn_t vop_virtio_intr_handler(int irq, void *data)
+{
+ struct _vop_vdev *vdev = data;
+ struct vop_device *vpdev = vdev->vpdev;
+ struct virtqueue *vq;
+
+ vpdev->hw_ops->ack_interrupt(vpdev, vdev->h2c_vdev_db);
+ list_for_each_entry(vq, &vdev->vdev.vqs, list)
+ vring_interrupt(0, vq);
+
+ return IRQ_HANDLED;
+}
+
+static void vop_virtio_release_dev(struct device *_d)
+{
+ struct virtio_device *vdev =
+ container_of(_d, struct virtio_device, dev);
+ struct _vop_vdev *vop_vdev =
+ container_of(vdev, struct _vop_vdev, vdev);
+
+ kfree(vop_vdev);
+}
+
+/*
+ * adds a new device and register it with virtio
+ * appropriate drivers are loaded by the device model
+ */
+static int _vop_add_device(struct mic_device_desc __iomem *d,
+ unsigned int offset, struct vop_device *vpdev,
+ int dnode)
+{
+ struct _vop_vdev *vdev, *reg_dev = NULL;
+ int ret;
+ u8 type = ioread8(&d->type);
+
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev)
+ return -ENOMEM;
+
+ vdev->vpdev = vpdev;
+ vdev->vdev.dev.parent = &vpdev->dev;
+ vdev->vdev.dev.release = vop_virtio_release_dev;
+ vdev->vdev.id.device = type;
+ vdev->vdev.config = &vop_vq_config_ops;
+ vdev->desc = d;
+ vdev->dc = (void __iomem *)d + _vop_aligned_desc_size(d);
+ vdev->dnode = dnode;
+ vdev->vdev.priv = (void *)(u64)dnode;
+ init_completion(&vdev->reset_done);
+
+ vdev->h2c_vdev_db = vpdev->hw_ops->next_db(vpdev);
+ vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
+ vop_virtio_intr_handler, "virtio intr",
+ vdev, vdev->h2c_vdev_db);
+ if (IS_ERR(vdev->virtio_cookie)) {
+ ret = PTR_ERR(vdev->virtio_cookie);
+ goto kfree;
+ }
+ iowrite8((u8)vdev->h2c_vdev_db, &vdev->dc->h2c_vdev_db);
+ vdev->c2h_vdev_db = ioread8(&vdev->dc->c2h_vdev_db);
+
+ ret = register_virtio_device(&vdev->vdev);
+ reg_dev = vdev;
+ if (ret) {
+ dev_err(_vop_dev(vdev),
+ "Failed to register vop device %u type %u\n",
+ offset, type);
+ goto free_irq;
+ }
+ writeq((u64)vdev, &vdev->dc->vdev);
+ dev_dbg(_vop_dev(vdev), "%s: registered vop device %u type %u vdev %p\n",
+ __func__, offset, type, vdev);
+
+ return 0;
+
+free_irq:
+ vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+kfree:
+ if (reg_dev)
+ put_device(&vdev->vdev.dev);
+ else
+ kfree(vdev);
+ return ret;
+}
+
+/*
+ * match for a vop device with a specific desc pointer
+ */
+static int vop_match_desc(struct device *dev, void *data)
+{
+ struct virtio_device *_dev = dev_to_virtio(dev);
+ struct _vop_vdev *vdev = to_vopvdev(_dev);
+
+ return vdev->desc == (void __iomem *)data;
+}
+
+static void _vop_handle_config_change(struct mic_device_desc __iomem *d,
+ unsigned int offset,
+ struct vop_device *vpdev)
+{
+ struct mic_device_ctrl __iomem *dc
+ = (void __iomem *)d + _vop_aligned_desc_size(d);
+ struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev);
+
+ if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
+ return;
+
+ dev_dbg(&vpdev->dev, "%s %d\n", __func__, __LINE__);
+ virtio_config_changed(&vdev->vdev);
+ iowrite8(1, &dc->guest_ack);
+}
+
+/*
+ * removes a virtio device if a hot remove event has been
+ * requested by the host.
+ */
+static int _vop_remove_device(struct mic_device_desc __iomem *d,
+ unsigned int offset, struct vop_device *vpdev)
+{
+ struct mic_device_ctrl __iomem *dc
+ = (void __iomem *)d + _vop_aligned_desc_size(d);
+ struct _vop_vdev *vdev = (struct _vop_vdev *)readq(&dc->vdev);
+ u8 status;
+ int ret = -1;
+
+ if (ioread8(&dc->config_change) == MIC_VIRTIO_PARAM_DEV_REMOVE) {
+ struct device *dev = get_device(&vdev->vdev.dev);
+
+ dev_dbg(&vpdev->dev,
+ "%s %d config_change %d type %d vdev %p\n",
+ __func__, __LINE__,
+ ioread8(&dc->config_change), ioread8(&d->type), vdev);
+ status = ioread8(&d->status);
+ reinit_completion(&vdev->reset_done);
+ unregister_virtio_device(&vdev->vdev);
+ vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+ iowrite8(-1, &dc->h2c_vdev_db);
+ if (status & VIRTIO_CONFIG_S_DRIVER_OK)
+ wait_for_completion(&vdev->reset_done);
+ put_device(dev);
+ iowrite8(1, &dc->guest_ack);
+ dev_dbg(&vpdev->dev, "%s %d guest_ack %d\n",
+ __func__, __LINE__, ioread8(&dc->guest_ack));
+ iowrite8(-1, &d->type);
+ ret = 0;
+ }
+ return ret;
+}
+
+#define REMOVE_DEVICES true
+
+static void _vop_scan_devices(void __iomem *dp, struct vop_device *vpdev,
+ bool remove, int dnode)
+{
+ s8 type;
+ unsigned int i;
+ struct mic_device_desc __iomem *d;
+ struct mic_device_ctrl __iomem *dc;
+ struct device *dev;
+ int ret;
+
+ for (i = sizeof(struct mic_bootparam);
+ i < MIC_DP_SIZE; i += _vop_total_desc_size(d)) {
+ d = dp + i;
+ dc = (void __iomem *)d + _vop_aligned_desc_size(d);
+ /*
+ * This read barrier is paired with the corresponding write
+ * barrier on the host which is inserted before adding or
+ * removing a virtio device descriptor, by updating the type.
+ */
+ rmb();
+ type = ioread8(&d->type);
+
+ /* end of list */
+ if (type == 0)
+ break;
+
+ if (type == -1)
+ continue;
+
+ /* device already exists */
+ dev = device_find_child(&vpdev->dev, (void __force *)d,
+ vop_match_desc);
+ if (dev) {
+ if (remove)
+ iowrite8(MIC_VIRTIO_PARAM_DEV_REMOVE,
+ &dc->config_change);
+ put_device(dev);
+ _vop_handle_config_change(d, i, vpdev);
+ ret = _vop_remove_device(d, i, vpdev);
+ if (remove) {
+ iowrite8(0, &dc->config_change);
+ iowrite8(0, &dc->guest_ack);
+ }
+ continue;
+ }
+
+ /* new device */
+ dev_dbg(&vpdev->dev, "%s %d Adding new virtio device %p\n",
+ __func__, __LINE__, d);
+ if (!remove)
+ _vop_add_device(d, i, vpdev, dnode);
+ }
+}
+
+static void vop_scan_devices(struct vop_info *vi,
+ struct vop_device *vpdev, bool remove)
+{
+ void __iomem *dp = vpdev->hw_ops->get_remote_dp(vpdev);
+
+ if (!dp)
+ return;
+ mutex_lock(&vi->vop_mutex);
+ _vop_scan_devices(dp, vpdev, remove, vpdev->dnode);
+ mutex_unlock(&vi->vop_mutex);
+}
+
+/*
+ * vop_hotplug_device tries to find changes in the device page.
+ */
+static void vop_hotplug_devices(struct work_struct *work)
+{
+ struct vop_info *vi = container_of(work, struct vop_info,
+ hotplug_work);
+
+ vop_scan_devices(vi, vi->vpdev, !REMOVE_DEVICES);
+}
+
+/*
+ * Interrupt handler for hot plug/config changes etc.
+ */
+static irqreturn_t vop_extint_handler(int irq, void *data)
+{
+ struct vop_info *vi = data;
+ struct mic_bootparam __iomem *bp;
+ struct vop_device *vpdev = vi->vpdev;
+
+ bp = vpdev->hw_ops->get_remote_dp(vpdev);
+ dev_dbg(&vpdev->dev, "%s %d hotplug work\n",
+ __func__, __LINE__);
+ vpdev->hw_ops->ack_interrupt(vpdev, ioread8(&bp->h2c_config_db));
+ schedule_work(&vi->hotplug_work);
+ return IRQ_HANDLED;
+}
+
+static int vop_driver_probe(struct vop_device *vpdev)
+{
+ struct vop_info *vi;
+ int rc;
+
+ vi = kzalloc(sizeof(*vi), GFP_KERNEL);
+ if (!vi) {
+ rc = -ENOMEM;
+ goto exit;
+ }
+ dev_set_drvdata(&vpdev->dev, vi);
+ vi->vpdev = vpdev;
+
+ mutex_init(&vi->vop_mutex);
+ INIT_WORK(&vi->hotplug_work, vop_hotplug_devices);
+ if (vpdev->dnode) {
+ rc = vop_host_init(vi);
+ if (rc < 0)
+ goto free;
+ } else {
+ struct mic_bootparam __iomem *bootparam;
+
+ vop_scan_devices(vi, vpdev, !REMOVE_DEVICES);
+
+ vi->h2c_config_db = vpdev->hw_ops->next_db(vpdev);
+ vi->cookie = vpdev->hw_ops->request_irq(vpdev,
+ vop_extint_handler,
+ "virtio_config_intr",
+ vi, vi->h2c_config_db);
+ if (IS_ERR(vi->cookie)) {
+ rc = PTR_ERR(vi->cookie);
+ goto free;
+ }
+ bootparam = vpdev->hw_ops->get_remote_dp(vpdev);
+ iowrite8(vi->h2c_config_db, &bootparam->h2c_config_db);
+ }
+ vop_init_debugfs(vi);
+ return 0;
+free:
+ kfree(vi);
+exit:
+ return rc;
+}
+
+static void vop_driver_remove(struct vop_device *vpdev)
+{
+ struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+
+ if (vpdev->dnode) {
+ vop_host_uninit(vi);
+ } else {
+ struct mic_bootparam __iomem *bootparam =
+ vpdev->hw_ops->get_remote_dp(vpdev);
+ if (bootparam)
+ iowrite8(-1, &bootparam->h2c_config_db);
+ vpdev->hw_ops->free_irq(vpdev, vi->cookie, vi);
+ flush_work(&vi->hotplug_work);
+ vop_scan_devices(vi, vpdev, REMOVE_DEVICES);
+ }
+ vop_exit_debugfs(vi);
+ kfree(vi);
+}
+
+static struct vop_device_id id_table[] = {
+ { VOP_DEV_TRNSP, VOP_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct vop_driver vop_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = vop_driver_probe,
+ .remove = vop_driver_remove,
+};
+
+module_vop_driver(vop_driver);
+
+MODULE_DEVICE_TABLE(mbus, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) Virtio Over PCIe (VOP) driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/misc/mic/vop/vop_main.h b/drivers/misc/mic/vop/vop_main.h
new file mode 100644
index 000000000..ba47ec7a6
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_main.h
@@ -0,0 +1,170 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#ifndef _VOP_MAIN_H_
+#define _VOP_MAIN_H_
+
+#include <linux/vringh.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio.h>
+#include <linux/miscdevice.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+#include "../bus/vop_bus.h"
+
+/*
+ * Note on endianness.
+ * 1. Host can be both BE or LE
+ * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail
+ * rings and ioreadXX/iowriteXX to access used ring.
+ * 3. Device page exposed by host to guest contains LE values. Guest
+ * accesses these using ioreadXX/iowriteXX etc. This way in general we
+ * obey the virtio spec according to which guest works with native
+ * endianness and host is aware of guest endianness and does all
+ * required endianness conversion.
+ * 4. Data provided from user space to guest (in ADD_DEVICE and
+ * CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be
+ * in guest endianness.
+ */
+
+/*
+ * vop_info - Allocated per invocation of VOP probe
+ *
+ * @vpdev: VOP device
+ * @hotplug_work: Handle virtio device creation, deletion and configuration
+ * @cookie: Cookie received upon requesting a virtio configuration interrupt
+ * @h2c_config_db: The doorbell used by the peer to indicate a config change
+ * @vdev_list: List of "active" virtio devices injected in the peer node
+ * @vop_mutex: Synchronize access to the device page as well as serialize
+ * creation/deletion of virtio devices on the peer node
+ * @dp: Peer device page information
+ * @dbg: Debugfs entry
+ * @dma_ch: The DMA channel used by this transport for data transfers.
+ * @name: Name for this transport used in misc device creation.
+ * @miscdev: The misc device registered.
+ */
+struct vop_info {
+ struct vop_device *vpdev;
+ struct work_struct hotplug_work;
+ struct mic_irq *cookie;
+ int h2c_config_db;
+ struct list_head vdev_list;
+ struct mutex vop_mutex;
+ void __iomem *dp;
+ struct dentry *dbg;
+ struct dma_chan *dma_ch;
+ char name[16];
+ struct miscdevice miscdev;
+};
+
+/**
+ * struct vop_vringh - Virtio ring host information.
+ *
+ * @vring: The VOP vring used for setting up user space mappings.
+ * @vrh: The host VRINGH used for accessing the card vrings.
+ * @riov: The VRINGH read kernel IOV.
+ * @wiov: The VRINGH write kernel IOV.
+ * @head: The VRINGH head index address passed to vringh_getdesc_kern(..).
+ * @vr_mutex: Mutex for synchronizing access to the VRING.
+ * @buf: Temporary kernel buffer used to copy in/out data
+ * from/to the card via DMA.
+ * @buf_da: dma address of buf.
+ * @vdev: Back pointer to VOP virtio device for vringh_notify(..).
+ */
+struct vop_vringh {
+ struct mic_vring vring;
+ struct vringh vrh;
+ struct vringh_kiov riov;
+ struct vringh_kiov wiov;
+ u16 head;
+ struct mutex vr_mutex;
+ void *buf;
+ dma_addr_t buf_da;
+ struct vop_vdev *vdev;
+};
+
+/**
+ * struct vop_vdev - Host information for a card Virtio device.
+ *
+ * @virtio_id - Virtio device id.
+ * @waitq - Waitqueue to allow ring3 apps to poll.
+ * @vpdev - pointer to VOP bus device.
+ * @poll_wake - Used for waking up threads blocked in poll.
+ * @out_bytes - Debug stats for number of bytes copied from host to card.
+ * @in_bytes - Debug stats for number of bytes copied from card to host.
+ * @out_bytes_dma - Debug stats for number of bytes copied from host to card
+ * using DMA.
+ * @in_bytes_dma - Debug stats for number of bytes copied from card to host
+ * using DMA.
+ * @tx_len_unaligned - Debug stats for number of bytes copied to the card where
+ * the transfer length did not have the required DMA alignment.
+ * @tx_dst_unaligned - Debug stats for number of bytes copied where the
+ * destination address on the card did not have the required DMA alignment.
+ * @vvr - Store per VRING data structures.
+ * @virtio_bh_work - Work struct used to schedule virtio bottom half handling.
+ * @dd - Virtio device descriptor.
+ * @dc - Virtio device control fields.
+ * @list - List of Virtio devices.
+ * @virtio_db - The doorbell used by the card to interrupt the host.
+ * @virtio_cookie - The cookie returned while requesting interrupts.
+ * @vi: Transport information.
+ * @vdev_mutex: Mutex synchronizing virtio device injection,
+ * removal and data transfers.
+ * @destroy: Track if a virtio device is being destroyed.
+ * @deleted: The virtio device has been deleted.
+ */
+struct vop_vdev {
+ int virtio_id;
+ wait_queue_head_t waitq;
+ struct vop_device *vpdev;
+ int poll_wake;
+ unsigned long out_bytes;
+ unsigned long in_bytes;
+ unsigned long out_bytes_dma;
+ unsigned long in_bytes_dma;
+ unsigned long tx_len_unaligned;
+ unsigned long tx_dst_unaligned;
+ unsigned long rx_dst_unaligned;
+ struct vop_vringh vvr[MIC_MAX_VRINGS];
+ struct work_struct virtio_bh_work;
+ struct mic_device_desc *dd;
+ struct mic_device_ctrl *dc;
+ struct list_head list;
+ int virtio_db;
+ struct mic_irq *virtio_cookie;
+ struct vop_info *vi;
+ struct mutex vdev_mutex;
+ struct completion destroy;
+ bool deleted;
+};
+
+/* Helper API to check if a virtio device is running */
+static inline bool vop_vdevup(struct vop_vdev *vdev)
+{
+ return !!vdev->dd->status;
+}
+
+void vop_init_debugfs(struct vop_info *vi);
+void vop_exit_debugfs(struct vop_info *vi);
+int vop_host_init(struct vop_info *vi);
+void vop_host_uninit(struct vop_info *vi);
+#endif
diff --git a/drivers/misc/mic/vop/vop_vringh.c b/drivers/misc/mic/vop/vop_vringh.c
new file mode 100644
index 000000000..a252c2199
--- /dev/null
+++ b/drivers/misc/mic/vop/vop_vringh.c
@@ -0,0 +1,1169 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel Virtio Over PCIe (VOP) driver.
+ *
+ */
+#include <linux/sched.h>
+#include <linux/poll.h>
+#include <linux/dma-mapping.h>
+
+#include <linux/mic_common.h>
+#include "../common/mic_dev.h"
+
+#include <linux/mic_ioctl.h>
+#include "vop_main.h"
+
+/* Helper API to obtain the VOP PCIe device */
+static inline struct device *vop_dev(struct vop_vdev *vdev)
+{
+ return vdev->vpdev->dev.parent;
+}
+
+/* Helper API to check if a virtio device is initialized */
+static inline int vop_vdev_inited(struct vop_vdev *vdev)
+{
+ if (!vdev)
+ return -EINVAL;
+ /* Device has not been created yet */
+ if (!vdev->dd || !vdev->dd->type) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, -EINVAL);
+ return -EINVAL;
+ }
+ /* Device has been removed/deleted */
+ if (vdev->dd->type == -1) {
+ dev_dbg(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, -ENODEV);
+ return -ENODEV;
+ }
+ return 0;
+}
+
+static void _vop_notify(struct vringh *vrh)
+{
+ struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
+ struct vop_vdev *vdev = vvrh->vdev;
+ struct vop_device *vpdev = vdev->vpdev;
+ s8 db = vdev->dc->h2c_vdev_db;
+
+ if (db != -1)
+ vpdev->hw_ops->send_intr(vpdev, db);
+}
+
+static void vop_virtio_init_post(struct vop_vdev *vdev)
+{
+ struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
+ struct vop_device *vpdev = vdev->vpdev;
+ int i, used_size;
+
+ for (i = 0; i < vdev->dd->num_vq; i++) {
+ used_size = PAGE_ALIGN(sizeof(u16) * 3 +
+ sizeof(struct vring_used_elem) *
+ le16_to_cpu(vqconfig->num));
+ if (!le64_to_cpu(vqconfig[i].used_address)) {
+ dev_warn(vop_dev(vdev), "used_address zero??\n");
+ continue;
+ }
+ vdev->vvr[i].vrh.vring.used =
+ (void __force *)vpdev->hw_ops->ioremap(
+ vpdev,
+ le64_to_cpu(vqconfig[i].used_address),
+ used_size);
+ }
+
+ vdev->dc->used_address_updated = 0;
+
+ dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
+ __func__, vdev->virtio_id);
+}
+
+static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
+{
+ int i;
+
+ dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
+ __func__, vdev->dd->status, vdev->virtio_id);
+
+ for (i = 0; i < vdev->dd->num_vq; i++)
+ /*
+ * Avoid lockdep false positive. The + 1 is for the vop
+ * mutex which is held in the reset devices code path.
+ */
+ mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
+
+ /* 0 status means "reset" */
+ vdev->dd->status = 0;
+ vdev->dc->vdev_reset = 0;
+ vdev->dc->host_ack = 1;
+
+ for (i = 0; i < vdev->dd->num_vq; i++) {
+ struct vringh *vrh = &vdev->vvr[i].vrh;
+
+ vdev->vvr[i].vring.info->avail_idx = 0;
+ vrh->completed = 0;
+ vrh->last_avail_idx = 0;
+ vrh->last_used_idx = 0;
+ }
+
+ for (i = 0; i < vdev->dd->num_vq; i++)
+ mutex_unlock(&vdev->vvr[i].vr_mutex);
+}
+
+static void vop_virtio_reset_devices(struct vop_info *vi)
+{
+ struct list_head *pos, *tmp;
+ struct vop_vdev *vdev;
+
+ list_for_each_safe(pos, tmp, &vi->vdev_list) {
+ vdev = list_entry(pos, struct vop_vdev, list);
+ vop_virtio_device_reset(vdev);
+ vdev->poll_wake = 1;
+ wake_up(&vdev->waitq);
+ }
+}
+
+static void vop_bh_handler(struct work_struct *work)
+{
+ struct vop_vdev *vdev = container_of(work, struct vop_vdev,
+ virtio_bh_work);
+
+ if (vdev->dc->used_address_updated)
+ vop_virtio_init_post(vdev);
+
+ if (vdev->dc->vdev_reset)
+ vop_virtio_device_reset(vdev);
+
+ vdev->poll_wake = 1;
+ wake_up(&vdev->waitq);
+}
+
+static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
+{
+ struct vop_vdev *vdev = data;
+ struct vop_device *vpdev = vdev->vpdev;
+
+ vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
+ schedule_work(&vdev->virtio_bh_work);
+ return IRQ_HANDLED;
+}
+
+static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
+{
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+ int ret = 0, retry, i;
+ struct vop_device *vpdev = vdev->vpdev;
+ struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+ struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+ s8 db = bootparam->h2c_config_db;
+
+ mutex_lock(&vi->vop_mutex);
+ for (i = 0; i < vdev->dd->num_vq; i++)
+ mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
+
+ if (db == -1 || vdev->dd->type == -1) {
+ ret = -EIO;
+ goto exit;
+ }
+
+ memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
+ vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
+ vpdev->hw_ops->send_intr(vpdev, db);
+
+ for (retry = 100; retry--;) {
+ ret = wait_event_timeout(wake, vdev->dc->guest_ack,
+ msecs_to_jiffies(100));
+ if (ret)
+ break;
+ }
+
+ dev_dbg(vop_dev(vdev),
+ "%s %d retry: %d\n", __func__, __LINE__, retry);
+ vdev->dc->config_change = 0;
+ vdev->dc->guest_ack = 0;
+exit:
+ for (i = 0; i < vdev->dd->num_vq; i++)
+ mutex_unlock(&vdev->vvr[i].vr_mutex);
+ mutex_unlock(&vi->vop_mutex);
+ return ret;
+}
+
+static int vop_copy_dp_entry(struct vop_vdev *vdev,
+ struct mic_device_desc *argp, __u8 *type,
+ struct mic_device_desc **devpage)
+{
+ struct vop_device *vpdev = vdev->vpdev;
+ struct mic_device_desc *devp;
+ struct mic_vqconfig *vqconfig;
+ int ret = 0, i;
+ bool slot_found = false;
+
+ vqconfig = mic_vq_config(argp);
+ for (i = 0; i < argp->num_vq; i++) {
+ if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
+ ret = -EINVAL;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto exit;
+ }
+ }
+
+ /* Find the first free device page entry */
+ for (i = sizeof(struct mic_bootparam);
+ i < MIC_DP_SIZE - mic_total_desc_size(argp);
+ i += mic_total_desc_size(devp)) {
+ devp = vpdev->hw_ops->get_dp(vpdev) + i;
+ if (devp->type == 0 || devp->type == -1) {
+ slot_found = true;
+ break;
+ }
+ }
+ if (!slot_found) {
+ ret = -EINVAL;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto exit;
+ }
+ /*
+ * Save off the type before doing the memcpy. Type will be set in the
+ * end after completing all initialization for the new device.
+ */
+ *type = argp->type;
+ argp->type = 0;
+ memcpy(devp, argp, mic_desc_size(argp));
+
+ *devpage = devp;
+exit:
+ return ret;
+}
+
+static void vop_init_device_ctrl(struct vop_vdev *vdev,
+ struct mic_device_desc *devpage)
+{
+ struct mic_device_ctrl *dc;
+
+ dc = (void *)devpage + mic_aligned_desc_size(devpage);
+
+ dc->config_change = 0;
+ dc->guest_ack = 0;
+ dc->vdev_reset = 0;
+ dc->host_ack = 0;
+ dc->used_address_updated = 0;
+ dc->c2h_vdev_db = -1;
+ dc->h2c_vdev_db = -1;
+ vdev->dc = dc;
+}
+
+static int vop_virtio_add_device(struct vop_vdev *vdev,
+ struct mic_device_desc *argp)
+{
+ struct vop_info *vi = vdev->vi;
+ struct vop_device *vpdev = vi->vpdev;
+ struct mic_device_desc *dd = NULL;
+ struct mic_vqconfig *vqconfig;
+ int vr_size, i, j, ret;
+ u8 type = 0;
+ s8 db = -1;
+ char irqname[16];
+ struct mic_bootparam *bootparam;
+ u16 num;
+ dma_addr_t vr_addr;
+
+ bootparam = vpdev->hw_ops->get_dp(vpdev);
+ init_waitqueue_head(&vdev->waitq);
+ INIT_LIST_HEAD(&vdev->list);
+ vdev->vpdev = vpdev;
+
+ ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
+ if (ret) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ return ret;
+ }
+
+ vop_init_device_ctrl(vdev, dd);
+
+ vdev->dd = dd;
+ vdev->virtio_id = type;
+ vqconfig = mic_vq_config(dd);
+ INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
+
+ for (i = 0; i < dd->num_vq; i++) {
+ struct vop_vringh *vvr = &vdev->vvr[i];
+ struct mic_vring *vr = &vdev->vvr[i].vring;
+
+ num = le16_to_cpu(vqconfig[i].num);
+ mutex_init(&vvr->vr_mutex);
+ vr_size = PAGE_ALIGN(round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4) +
+ sizeof(struct _mic_vring_info));
+ vr->va = (void *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(vr_size));
+ if (!vr->va) {
+ ret = -ENOMEM;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto err;
+ }
+ vr->len = vr_size;
+ vr->info = vr->va + round_up(vring_size(num, MIC_VIRTIO_RING_ALIGN), 4);
+ vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
+ vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&vpdev->dev, vr_addr)) {
+ free_pages((unsigned long)vr->va, get_order(vr_size));
+ ret = -ENOMEM;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto err;
+ }
+ vqconfig[i].address = cpu_to_le64(vr_addr);
+
+ vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
+ ret = vringh_init_kern(&vvr->vrh,
+ *(u32 *)mic_vq_features(vdev->dd),
+ num, false, vr->vr.desc, vr->vr.avail,
+ vr->vr.used);
+ if (ret) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ goto err;
+ }
+ vringh_kiov_init(&vvr->riov, NULL, 0);
+ vringh_kiov_init(&vvr->wiov, NULL, 0);
+ vvr->head = USHRT_MAX;
+ vvr->vdev = vdev;
+ vvr->vrh.notify = _vop_notify;
+ dev_dbg(&vpdev->dev,
+ "%s %d index %d va %p info %p vr_size 0x%x\n",
+ __func__, __LINE__, i, vr->va, vr->info, vr_size);
+ vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(VOP_INT_DMA_BUF_SIZE));
+ vvr->buf_da = dma_map_single(&vpdev->dev,
+ vvr->buf, VOP_INT_DMA_BUF_SIZE,
+ DMA_BIDIRECTIONAL);
+ }
+
+ snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
+ vdev->virtio_id);
+ vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
+ vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
+ _vop_virtio_intr_handler, irqname, vdev,
+ vdev->virtio_db);
+ if (IS_ERR(vdev->virtio_cookie)) {
+ ret = PTR_ERR(vdev->virtio_cookie);
+ dev_dbg(&vpdev->dev, "request irq failed\n");
+ goto err;
+ }
+
+ vdev->dc->c2h_vdev_db = vdev->virtio_db;
+
+ /*
+ * Order the type update with previous stores. This write barrier
+ * is paired with the corresponding read barrier before the uncached
+ * system memory read of the type, on the card while scanning the
+ * device page.
+ */
+ smp_wmb();
+ dd->type = type;
+ argp->type = type;
+
+ if (bootparam) {
+ db = bootparam->h2c_config_db;
+ if (db != -1)
+ vpdev->hw_ops->send_intr(vpdev, db);
+ }
+ dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
+ return 0;
+err:
+ vqconfig = mic_vq_config(dd);
+ for (j = 0; j < i; j++) {
+ struct vop_vringh *vvr = &vdev->vvr[j];
+
+ dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
+ vvr->vring.len, DMA_BIDIRECTIONAL);
+ free_pages((unsigned long)vvr->vring.va,
+ get_order(vvr->vring.len));
+ }
+ return ret;
+}
+
+static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
+ struct vop_device *vpdev)
+{
+ struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+ s8 db;
+ int ret, retry;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+ devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
+ db = bootparam->h2c_config_db;
+ if (db != -1)
+ vpdev->hw_ops->send_intr(vpdev, db);
+ else
+ goto done;
+ for (retry = 15; retry--;) {
+ ret = wait_event_timeout(wake, devp->guest_ack,
+ msecs_to_jiffies(1000));
+ if (ret)
+ break;
+ }
+done:
+ devp->config_change = 0;
+ devp->guest_ack = 0;
+}
+
+static void vop_virtio_del_device(struct vop_vdev *vdev)
+{
+ struct vop_info *vi = vdev->vi;
+ struct vop_device *vpdev = vdev->vpdev;
+ int i;
+ struct mic_vqconfig *vqconfig;
+ struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
+
+ if (!bootparam)
+ goto skip_hot_remove;
+ vop_dev_remove(vi, vdev->dc, vpdev);
+skip_hot_remove:
+ vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
+ flush_work(&vdev->virtio_bh_work);
+ vqconfig = mic_vq_config(vdev->dd);
+ for (i = 0; i < vdev->dd->num_vq; i++) {
+ struct vop_vringh *vvr = &vdev->vvr[i];
+
+ dma_unmap_single(&vpdev->dev,
+ vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
+ DMA_BIDIRECTIONAL);
+ free_pages((unsigned long)vvr->buf,
+ get_order(VOP_INT_DMA_BUF_SIZE));
+ vringh_kiov_cleanup(&vvr->riov);
+ vringh_kiov_cleanup(&vvr->wiov);
+ dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
+ vvr->vring.len, DMA_BIDIRECTIONAL);
+ free_pages((unsigned long)vvr->vring.va,
+ get_order(vvr->vring.len));
+ }
+ /*
+ * Order the type update with previous stores. This write barrier
+ * is paired with the corresponding read barrier before the uncached
+ * system memory read of the type, on the card while scanning the
+ * device page.
+ */
+ smp_wmb();
+ vdev->dd->type = -1;
+}
+
+/*
+ * vop_sync_dma - Wrapper for synchronous DMAs.
+ *
+ * @dev - The address of the pointer to the device instance used
+ * for DMA registration.
+ * @dst - destination DMA address.
+ * @src - source DMA address.
+ * @len - size of the transfer.
+ *
+ * Return DMA_SUCCESS on success
+ */
+static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
+ size_t len)
+{
+ int err = 0;
+ struct dma_device *ddev;
+ struct dma_async_tx_descriptor *tx;
+ struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
+ struct dma_chan *vop_ch = vi->dma_ch;
+
+ if (!vop_ch) {
+ err = -EBUSY;
+ goto error;
+ }
+ ddev = vop_ch->device;
+ tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
+ DMA_PREP_FENCE);
+ if (!tx) {
+ err = -ENOMEM;
+ goto error;
+ } else {
+ dma_cookie_t cookie;
+
+ cookie = tx->tx_submit(tx);
+ if (dma_submit_error(cookie)) {
+ err = -ENOMEM;
+ goto error;
+ }
+ dma_async_issue_pending(vop_ch);
+ err = dma_sync_wait(vop_ch, cookie);
+ }
+error:
+ if (err)
+ dev_err(&vi->vpdev->dev, "%s %d err %d\n",
+ __func__, __LINE__, err);
+ return err;
+}
+
+#define VOP_USE_DMA true
+
+/*
+ * Initiates the copies across the PCIe bus from card memory to a user
+ * space buffer. When transfers are done using DMA, source/destination
+ * addresses and transfer length must follow the alignment requirements of
+ * the MIC DMA engine.
+ */
+static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
+ size_t len, u64 daddr, size_t dlen,
+ int vr_idx)
+{
+ struct vop_device *vpdev = vdev->vpdev;
+ void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
+ struct vop_vringh *vvr = &vdev->vvr[vr_idx];
+ struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
+ size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
+ bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+ size_t dma_offset, partlen;
+ int err;
+
+ if (!VOP_USE_DMA) {
+ if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
+ err = -EFAULT;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto err;
+ }
+ vdev->in_bytes += len;
+ err = 0;
+ goto err;
+ }
+
+ dma_offset = daddr - round_down(daddr, dma_alignment);
+ daddr -= dma_offset;
+ len += dma_offset;
+ /*
+ * X100 uses DMA addresses as seen by the card so adding
+ * the aperture base is not required for DMA. However x200
+ * requires DMA addresses to be an offset into the bar so
+ * add the aperture base for x200.
+ */
+ if (x200)
+ daddr += vpdev->aper->pa;
+ while (len) {
+ partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+ err = vop_sync_dma(vdev, vvr->buf_da, daddr,
+ ALIGN(partlen, dma_alignment));
+ if (err) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto err;
+ }
+ if (copy_to_user(ubuf, vvr->buf + dma_offset,
+ partlen - dma_offset)) {
+ err = -EFAULT;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto err;
+ }
+ daddr += partlen;
+ ubuf += partlen;
+ dbuf += partlen;
+ vdev->in_bytes_dma += partlen;
+ vdev->in_bytes += partlen;
+ len -= partlen;
+ dma_offset = 0;
+ }
+ err = 0;
+err:
+ vpdev->hw_ops->iounmap(vpdev, dbuf);
+ dev_dbg(vop_dev(vdev),
+ "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
+ __func__, ubuf, dbuf, len, vr_idx);
+ return err;
+}
+
+/*
+ * Initiates copies across the PCIe bus from a user space buffer to card
+ * memory. When transfers are done using DMA, source/destination addresses
+ * and transfer length must follow the alignment requirements of the MIC
+ * DMA engine.
+ */
+static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
+ size_t len, u64 daddr, size_t dlen,
+ int vr_idx)
+{
+ struct vop_device *vpdev = vdev->vpdev;
+ void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
+ struct vop_vringh *vvr = &vdev->vvr[vr_idx];
+ struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
+ size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
+ bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
+ size_t partlen;
+ bool dma = VOP_USE_DMA;
+ int err = 0;
+ size_t offset = 0;
+
+ if (daddr & (dma_alignment - 1)) {
+ vdev->tx_dst_unaligned += len;
+ dma = false;
+ } else if (ALIGN(len, dma_alignment) > dlen) {
+ vdev->tx_len_unaligned += len;
+ dma = false;
+ }
+
+ if (!dma)
+ goto memcpy;
+
+ /*
+ * X100 uses DMA addresses as seen by the card so adding
+ * the aperture base is not required for DMA. However x200
+ * requires DMA addresses to be an offset into the bar so
+ * add the aperture base for x200.
+ */
+ if (x200)
+ daddr += vpdev->aper->pa;
+ while (len) {
+ partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+
+ if (copy_from_user(vvr->buf, ubuf, partlen)) {
+ err = -EFAULT;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto err;
+ }
+ err = vop_sync_dma(vdev, daddr, vvr->buf_da,
+ ALIGN(partlen, dma_alignment));
+ if (err) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto err;
+ }
+ daddr += partlen;
+ ubuf += partlen;
+ dbuf += partlen;
+ vdev->out_bytes_dma += partlen;
+ vdev->out_bytes += partlen;
+ len -= partlen;
+ }
+memcpy:
+ /*
+ * We are copying to IO below and should ideally use something
+ * like copy_from_user_toio(..) if it existed.
+ */
+ while (len) {
+ partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
+
+ if (copy_from_user(vvr->buf, ubuf + offset, partlen)) {
+ err = -EFAULT;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto err;
+ }
+ memcpy_toio(dbuf + offset, vvr->buf, partlen);
+ offset += partlen;
+ vdev->out_bytes += partlen;
+ len -= partlen;
+ }
+ err = 0;
+err:
+ vpdev->hw_ops->iounmap(vpdev, dbuf);
+ dev_dbg(vop_dev(vdev),
+ "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
+ __func__, ubuf, dbuf, len, vr_idx);
+ return err;
+}
+
+#define MIC_VRINGH_READ true
+
+/* Determine the total number of bytes consumed in a VRINGH KIOV */
+static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
+{
+ int i;
+ u32 total = iov->consumed;
+
+ for (i = 0; i < iov->i; i++)
+ total += iov->iov[i].iov_len;
+ return total;
+}
+
+/*
+ * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
+ * This API is heavily based on the vringh_iov_xfer(..) implementation
+ * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
+ * and vringh_iov_push_kern(..) directly is because there is no
+ * way to override the VRINGH xfer(..) routines as of v3.10.
+ */
+static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
+ void __user *ubuf, size_t len, bool read, int vr_idx,
+ size_t *out_len)
+{
+ int ret = 0;
+ size_t partlen, tot_len = 0;
+
+ while (len && iov->i < iov->used) {
+ struct kvec *kiov = &iov->iov[iov->i];
+
+ partlen = min(kiov->iov_len, len);
+ if (read)
+ ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
+ (u64)kiov->iov_base,
+ kiov->iov_len,
+ vr_idx);
+ else
+ ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
+ (u64)kiov->iov_base,
+ kiov->iov_len,
+ vr_idx);
+ if (ret) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ break;
+ }
+ len -= partlen;
+ ubuf += partlen;
+ tot_len += partlen;
+ iov->consumed += partlen;
+ kiov->iov_len -= partlen;
+ kiov->iov_base += partlen;
+ if (!kiov->iov_len) {
+ /* Fix up old iov element then increment. */
+ kiov->iov_len = iov->consumed;
+ kiov->iov_base -= iov->consumed;
+
+ iov->consumed = 0;
+ iov->i++;
+ }
+ }
+ *out_len = tot_len;
+ return ret;
+}
+
+/*
+ * Use the standard VRINGH infrastructure in the kernel to fetch new
+ * descriptors, initiate the copies and update the used ring.
+ */
+static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
+{
+ int ret = 0;
+ u32 iovcnt = copy->iovcnt;
+ struct iovec iov;
+ struct iovec __user *u_iov = copy->iov;
+ void __user *ubuf = NULL;
+ struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
+ struct vringh_kiov *riov = &vvr->riov;
+ struct vringh_kiov *wiov = &vvr->wiov;
+ struct vringh *vrh = &vvr->vrh;
+ u16 *head = &vvr->head;
+ struct mic_vring *vr = &vvr->vring;
+ size_t len = 0, out_len;
+
+ copy->out_len = 0;
+ /* Fetch a new IOVEC if all previous elements have been processed */
+ if (riov->i == riov->used && wiov->i == wiov->used) {
+ ret = vringh_getdesc_kern(vrh, riov, wiov,
+ head, GFP_KERNEL);
+ /* Check if there are available descriptors */
+ if (ret <= 0)
+ return ret;
+ }
+ while (iovcnt) {
+ if (!len) {
+ /* Copy over a new iovec from user space. */
+ ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
+ if (ret) {
+ ret = -EINVAL;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ break;
+ }
+ len = iov.iov_len;
+ ubuf = iov.iov_base;
+ }
+ /* Issue all the read descriptors first */
+ ret = vop_vringh_copy(vdev, riov, ubuf, len,
+ MIC_VRINGH_READ, copy->vr_idx, &out_len);
+ if (ret) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ break;
+ }
+ len -= out_len;
+ ubuf += out_len;
+ copy->out_len += out_len;
+ /* Issue the write descriptors next */
+ ret = vop_vringh_copy(vdev, wiov, ubuf, len,
+ !MIC_VRINGH_READ, copy->vr_idx, &out_len);
+ if (ret) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, ret);
+ break;
+ }
+ len -= out_len;
+ ubuf += out_len;
+ copy->out_len += out_len;
+ if (!len) {
+ /* One user space iovec is now completed */
+ iovcnt--;
+ u_iov++;
+ }
+ /* Exit loop if all elements in KIOVs have been processed. */
+ if (riov->i == riov->used && wiov->i == wiov->used)
+ break;
+ }
+ /*
+ * Update the used ring if a descriptor was available and some data was
+ * copied in/out and the user asked for a used ring update.
+ */
+ if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
+ u32 total = 0;
+
+ /* Determine the total data consumed */
+ total += vop_vringh_iov_consumed(riov);
+ total += vop_vringh_iov_consumed(wiov);
+ vringh_complete_kern(vrh, *head, total);
+ *head = USHRT_MAX;
+ if (vringh_need_notify_kern(vrh) > 0)
+ vringh_notify(vrh);
+ vringh_kiov_cleanup(riov);
+ vringh_kiov_cleanup(wiov);
+ /* Update avail idx for user space */
+ vr->info->avail_idx = vrh->last_avail_idx;
+ }
+ return ret;
+}
+
+static inline int vop_verify_copy_args(struct vop_vdev *vdev,
+ struct mic_copy_desc *copy)
+{
+ if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
+ return -EINVAL;
+ return 0;
+}
+
+/* Copy a specified number of virtio descriptors in a chain */
+static int vop_virtio_copy_desc(struct vop_vdev *vdev,
+ struct mic_copy_desc *copy)
+{
+ int err;
+ struct vop_vringh *vvr;
+
+ err = vop_verify_copy_args(vdev, copy);
+ if (err)
+ return err;
+
+ vvr = &vdev->vvr[copy->vr_idx];
+ mutex_lock(&vvr->vr_mutex);
+ if (!vop_vdevup(vdev)) {
+ err = -ENODEV;
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ goto err;
+ }
+ err = _vop_virtio_copy(vdev, copy);
+ if (err) {
+ dev_err(vop_dev(vdev), "%s %d err %d\n",
+ __func__, __LINE__, err);
+ }
+err:
+ mutex_unlock(&vvr->vr_mutex);
+ return err;
+}
+
+static int vop_open(struct inode *inode, struct file *f)
+{
+ struct vop_vdev *vdev;
+ struct vop_info *vi = container_of(f->private_data,
+ struct vop_info, miscdev);
+
+ vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+ if (!vdev)
+ return -ENOMEM;
+ vdev->vi = vi;
+ mutex_init(&vdev->vdev_mutex);
+ f->private_data = vdev;
+ init_completion(&vdev->destroy);
+ complete(&vdev->destroy);
+ return 0;
+}
+
+static int vop_release(struct inode *inode, struct file *f)
+{
+ struct vop_vdev *vdev = f->private_data, *vdev_tmp;
+ struct vop_info *vi = vdev->vi;
+ struct list_head *pos, *tmp;
+ bool found = false;
+
+ mutex_lock(&vdev->vdev_mutex);
+ if (vdev->deleted)
+ goto unlock;
+ mutex_lock(&vi->vop_mutex);
+ list_for_each_safe(pos, tmp, &vi->vdev_list) {
+ vdev_tmp = list_entry(pos, struct vop_vdev, list);
+ if (vdev == vdev_tmp) {
+ vop_virtio_del_device(vdev);
+ list_del(pos);
+ found = true;
+ break;
+ }
+ }
+ mutex_unlock(&vi->vop_mutex);
+unlock:
+ mutex_unlock(&vdev->vdev_mutex);
+ if (!found)
+ wait_for_completion(&vdev->destroy);
+ f->private_data = NULL;
+ kfree(vdev);
+ return 0;
+}
+
+static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+ struct vop_vdev *vdev = f->private_data;
+ struct vop_info *vi = vdev->vi;
+ void __user *argp = (void __user *)arg;
+ int ret;
+
+ switch (cmd) {
+ case MIC_VIRTIO_ADD_DEVICE:
+ {
+ struct mic_device_desc dd, *dd_config;
+
+ if (copy_from_user(&dd, argp, sizeof(dd)))
+ return -EFAULT;
+
+ if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
+ dd.num_vq > MIC_MAX_VRINGS)
+ return -EINVAL;
+
+ dd_config = memdup_user(argp, mic_desc_size(&dd));
+ if (IS_ERR(dd_config))
+ return PTR_ERR(dd_config);
+
+ /* Ensure desc has not changed between the two reads */
+ if (memcmp(&dd, dd_config, sizeof(dd))) {
+ ret = -EINVAL;
+ goto free_ret;
+ }
+ mutex_lock(&vdev->vdev_mutex);
+ mutex_lock(&vi->vop_mutex);
+ ret = vop_virtio_add_device(vdev, dd_config);
+ if (ret)
+ goto unlock_ret;
+ list_add_tail(&vdev->list, &vi->vdev_list);
+unlock_ret:
+ mutex_unlock(&vi->vop_mutex);
+ mutex_unlock(&vdev->vdev_mutex);
+free_ret:
+ kfree(dd_config);
+ return ret;
+ }
+ case MIC_VIRTIO_COPY_DESC:
+ {
+ struct mic_copy_desc copy;
+
+ mutex_lock(&vdev->vdev_mutex);
+ ret = vop_vdev_inited(vdev);
+ if (ret)
+ goto _unlock_ret;
+
+ if (copy_from_user(&copy, argp, sizeof(copy))) {
+ ret = -EFAULT;
+ goto _unlock_ret;
+ }
+
+ ret = vop_virtio_copy_desc(vdev, &copy);
+ if (ret < 0)
+ goto _unlock_ret;
+ if (copy_to_user(
+ &((struct mic_copy_desc __user *)argp)->out_len,
+ &copy.out_len, sizeof(copy.out_len)))
+ ret = -EFAULT;
+_unlock_ret:
+ mutex_unlock(&vdev->vdev_mutex);
+ return ret;
+ }
+ case MIC_VIRTIO_CONFIG_CHANGE:
+ {
+ void *buf;
+
+ mutex_lock(&vdev->vdev_mutex);
+ ret = vop_vdev_inited(vdev);
+ if (ret)
+ goto __unlock_ret;
+ buf = memdup_user(argp, vdev->dd->config_len);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto __unlock_ret;
+ }
+ ret = vop_virtio_config_change(vdev, buf);
+ kfree(buf);
+__unlock_ret:
+ mutex_unlock(&vdev->vdev_mutex);
+ return ret;
+ }
+ default:
+ return -ENOIOCTLCMD;
+ };
+ return 0;
+}
+
+/*
+ * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
+ * not when previously enqueued buffers may be available. This means that
+ * in the card->host (TX) path, when userspace is unblocked by poll it
+ * must drain all available descriptors or it can stall.
+ */
+static __poll_t vop_poll(struct file *f, poll_table *wait)
+{
+ struct vop_vdev *vdev = f->private_data;
+ __poll_t mask = 0;
+
+ mutex_lock(&vdev->vdev_mutex);
+ if (vop_vdev_inited(vdev)) {
+ mask = EPOLLERR;
+ goto done;
+ }
+ poll_wait(f, &vdev->waitq, wait);
+ if (vop_vdev_inited(vdev)) {
+ mask = EPOLLERR;
+ } else if (vdev->poll_wake) {
+ vdev->poll_wake = 0;
+ mask = EPOLLIN | EPOLLOUT;
+ }
+done:
+ mutex_unlock(&vdev->vdev_mutex);
+ return mask;
+}
+
+static inline int
+vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
+ unsigned long *size, unsigned long *pa)
+{
+ struct vop_device *vpdev = vdev->vpdev;
+ unsigned long start = MIC_DP_SIZE;
+ int i;
+
+ /*
+ * MMAP interface is as follows:
+ * offset region
+ * 0x0 virtio device_page
+ * 0x1000 first vring
+ * 0x1000 + size of 1st vring second vring
+ * ....
+ */
+ if (!offset) {
+ *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
+ *size = MIC_DP_SIZE;
+ return 0;
+ }
+
+ for (i = 0; i < vdev->dd->num_vq; i++) {
+ struct vop_vringh *vvr = &vdev->vvr[i];
+
+ if (offset == start) {
+ *pa = virt_to_phys(vvr->vring.va);
+ *size = vvr->vring.len;
+ return 0;
+ }
+ start += vvr->vring.len;
+ }
+ return -1;
+}
+
+/*
+ * Maps the device page and virtio rings to user space for readonly access.
+ */
+static int vop_mmap(struct file *f, struct vm_area_struct *vma)
+{
+ struct vop_vdev *vdev = f->private_data;
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
+ int i, err;
+
+ err = vop_vdev_inited(vdev);
+ if (err)
+ goto ret;
+ if (vma->vm_flags & VM_WRITE) {
+ err = -EACCES;
+ goto ret;
+ }
+ while (size_rem) {
+ i = vop_query_offset(vdev, offset, &size, &pa);
+ if (i < 0) {
+ err = -EINVAL;
+ goto ret;
+ }
+ err = remap_pfn_range(vma, vma->vm_start + offset,
+ pa >> PAGE_SHIFT, size,
+ vma->vm_page_prot);
+ if (err)
+ goto ret;
+ size_rem -= size;
+ offset += size;
+ }
+ret:
+ return err;
+}
+
+static const struct file_operations vop_fops = {
+ .open = vop_open,
+ .release = vop_release,
+ .unlocked_ioctl = vop_ioctl,
+ .poll = vop_poll,
+ .mmap = vop_mmap,
+ .owner = THIS_MODULE,
+};
+
+int vop_host_init(struct vop_info *vi)
+{
+ int rc;
+ struct miscdevice *mdev;
+ struct vop_device *vpdev = vi->vpdev;
+
+ INIT_LIST_HEAD(&vi->vdev_list);
+ vi->dma_ch = vpdev->dma_ch;
+ mdev = &vi->miscdev;
+ mdev->minor = MISC_DYNAMIC_MINOR;
+ snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
+ mdev->name = vi->name;
+ mdev->fops = &vop_fops;
+ mdev->parent = &vpdev->dev;
+
+ rc = misc_register(mdev);
+ if (rc)
+ dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
+ return rc;
+}
+
+void vop_host_uninit(struct vop_info *vi)
+{
+ struct list_head *pos, *tmp;
+ struct vop_vdev *vdev;
+
+ mutex_lock(&vi->vop_mutex);
+ vop_virtio_reset_devices(vi);
+ list_for_each_safe(pos, tmp, &vi->vdev_list) {
+ vdev = list_entry(pos, struct vop_vdev, list);
+ list_del(pos);
+ reinit_completion(&vdev->destroy);
+ mutex_unlock(&vi->vop_mutex);
+ mutex_lock(&vdev->vdev_mutex);
+ vop_virtio_del_device(vdev);
+ vdev->deleted = true;
+ mutex_unlock(&vdev->vdev_mutex);
+ complete(&vdev->destroy);
+ mutex_lock(&vi->vop_mutex);
+ }
+ mutex_unlock(&vi->vop_mutex);
+ misc_deregister(&vi->miscdev);
+}