summaryrefslogtreecommitdiffstats
path: root/samples/vfio-mdev
diff options
context:
space:
mode:
Diffstat (limited to 'samples/vfio-mdev')
-rw-r--r--samples/vfio-mdev/Makefile5
-rw-r--r--samples/vfio-mdev/README.rst100
-rw-r--r--samples/vfio-mdev/mbochs.c1475
-rw-r--r--samples/vfio-mdev/mdpy-defs.h22
-rw-r--r--samples/vfio-mdev/mdpy-fb.c232
-rw-r--r--samples/vfio-mdev/mdpy.c764
-rw-r--r--samples/vfio-mdev/mtty.c1473
7 files changed, 4071 insertions, 0 deletions
diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile
new file mode 100644
index 0000000000..10d179c4fd
--- /dev/null
+++ b/samples/vfio-mdev/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY) += mdpy.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB) += mdpy-fb.o
+obj-$(CONFIG_SAMPLE_VFIO_MDEV_MBOCHS) += mbochs.o
diff --git a/samples/vfio-mdev/README.rst b/samples/vfio-mdev/README.rst
new file mode 100644
index 0000000000..b52eb37739
--- /dev/null
+++ b/samples/vfio-mdev/README.rst
@@ -0,0 +1,100 @@
+Using the mtty vfio-mdev sample code
+====================================
+
+mtty is a sample vfio-mdev driver that demonstrates how to use the mediated
+device framework.
+
+The sample driver creates an mdev device that simulates a serial port over a PCI
+card.
+
+1. Build and load the mtty.ko module.
+
+ This step creates a dummy device, /sys/devices/virtual/mtty/mtty/
+
+ Files in this device directory in sysfs are similar to the following::
+
+ # tree /sys/devices/virtual/mtty/mtty/
+ /sys/devices/virtual/mtty/mtty/
+ |-- mdev_supported_types
+ | |-- mtty-1
+ | | |-- available_instances
+ | | |-- create
+ | | |-- device_api
+ | | |-- devices
+ | | `-- name
+ | `-- mtty-2
+ | |-- available_instances
+ | |-- create
+ | |-- device_api
+ | |-- devices
+ | `-- name
+ |-- mtty_dev
+ | `-- sample_mtty_dev
+ |-- power
+ | |-- autosuspend_delay_ms
+ | |-- control
+ | |-- runtime_active_time
+ | |-- runtime_status
+ | `-- runtime_suspended_time
+ |-- subsystem -> ../../../../class/mtty
+ `-- uevent
+
+2. Create a mediated device by using the dummy device that you created in the
+ previous step::
+
+ # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001" > \
+ /sys/devices/virtual/mtty/mtty/mdev_supported_types/mtty-2/create
+
+3. Add parameters to qemu-kvm::
+
+ -device vfio-pci,\
+ sysfsdev=/sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001
+
+4. Boot the VM.
+
+ In the Linux guest VM, with no hardware on the host, the device appears
+ as follows::
+
+ # lspci -s 00:05.0 -xxvv
+ 00:05.0 Serial controller: Device 4348:3253 (rev 10) (prog-if 02 [16550])
+ Subsystem: Device 4348:3253
+ Physical Slot: 5
+ Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr-
+ Stepping- SERR- FastB2B- DisINTx-
+ Status: Cap- 66MHz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort-
+ <TAbort- <MAbort- >SERR- <PERR- INTx-
+ Interrupt: pin A routed to IRQ 10
+ Region 0: I/O ports at c150 [size=8]
+ Region 1: I/O ports at c158 [size=8]
+ Kernel driver in use: serial
+ 00: 48 43 53 32 01 00 00 02 10 02 00 07 00 00 00 00
+ 10: 51 c1 00 00 59 c1 00 00 00 00 00 00 00 00 00 00
+ 20: 00 00 00 00 00 00 00 00 00 00 00 00 48 43 53 32
+ 30: 00 00 00 00 00 00 00 00 00 00 00 00 0a 01 00 00
+
+ In the Linux guest VM, dmesg output for the device is as follows:
+
+ serial 0000:00:05.0: PCI INT A -> Link[LNKA] -> GSI 10 (level, high) -> IRQ 10
+ 0000:00:05.0: ttyS1 at I/O 0xc150 (irq = 10) is a 16550A
+ 0000:00:05.0: ttyS2 at I/O 0xc158 (irq = 10) is a 16550A
+
+
+5. In the Linux guest VM, check the serial ports::
+
+ # setserial -g /dev/ttyS*
+ /dev/ttyS0, UART: 16550A, Port: 0x03f8, IRQ: 4
+ /dev/ttyS1, UART: 16550A, Port: 0xc150, IRQ: 10
+ /dev/ttyS2, UART: 16550A, Port: 0xc158, IRQ: 10
+
+6. Using minicom or any terminal emulation program, open port /dev/ttyS1 or
+ /dev/ttyS2 with hardware flow control disabled.
+
+7. Type data on the minicom terminal or send data to the terminal emulation
+ program and read the data.
+
+ Data is loop backed from hosts mtty driver.
+
+8. Destroy the mediated device that you created::
+
+ # echo 1 > /sys/bus/mdev/devices/83b8f4f2-509f-382f-3c1e-e6bfe0fa1001/remove
+
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
new file mode 100644
index 0000000000..3764d1911b
--- /dev/null
+++ b/samples/vfio-mdev/mbochs.c
@@ -0,0 +1,1475 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mediated virtual PCI display host device driver
+ *
+ * Emulate enough of qemu stdvga to make bochs-drm.ko happy. That is
+ * basically the vram memory bar and the bochs dispi interface vbe
+ * registers in the mmio register bar. Specifically it does *not*
+ * include any legacy vga stuff. Device looks a lot like "qemu -device
+ * secondary-vga".
+ *
+ * (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * based on mtty driver which is:
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ * Author: Neo Jia <cjia@nvidia.com>
+ * Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/cdev.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_rect.h>
+#include <drm/drm_modeset_lock.h>
+#include <drm/drm_property.h>
+#include <drm/drm_plane.h>
+
+
+#define VBE_DISPI_INDEX_ID 0x0
+#define VBE_DISPI_INDEX_XRES 0x1
+#define VBE_DISPI_INDEX_YRES 0x2
+#define VBE_DISPI_INDEX_BPP 0x3
+#define VBE_DISPI_INDEX_ENABLE 0x4
+#define VBE_DISPI_INDEX_BANK 0x5
+#define VBE_DISPI_INDEX_VIRT_WIDTH 0x6
+#define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
+#define VBE_DISPI_INDEX_X_OFFSET 0x8
+#define VBE_DISPI_INDEX_Y_OFFSET 0x9
+#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
+#define VBE_DISPI_INDEX_COUNT 0xb
+
+#define VBE_DISPI_ID0 0xB0C0
+#define VBE_DISPI_ID1 0xB0C1
+#define VBE_DISPI_ID2 0xB0C2
+#define VBE_DISPI_ID3 0xB0C3
+#define VBE_DISPI_ID4 0xB0C4
+#define VBE_DISPI_ID5 0xB0C5
+
+#define VBE_DISPI_DISABLED 0x00
+#define VBE_DISPI_ENABLED 0x01
+#define VBE_DISPI_GETCAPS 0x02
+#define VBE_DISPI_8BIT_DAC 0x20
+#define VBE_DISPI_LFB_ENABLED 0x40
+#define VBE_DISPI_NOCLEARMEM 0x80
+
+
+#define MBOCHS_NAME "mbochs"
+#define MBOCHS_CLASS_NAME "mbochs"
+
+#define MBOCHS_EDID_REGION_INDEX VFIO_PCI_NUM_REGIONS
+#define MBOCHS_NUM_REGIONS (MBOCHS_EDID_REGION_INDEX+1)
+
+#define MBOCHS_CONFIG_SPACE_SIZE 0xff
+#define MBOCHS_MMIO_BAR_OFFSET PAGE_SIZE
+#define MBOCHS_MMIO_BAR_SIZE PAGE_SIZE
+#define MBOCHS_EDID_OFFSET (MBOCHS_MMIO_BAR_OFFSET + \
+ MBOCHS_MMIO_BAR_SIZE)
+#define MBOCHS_EDID_SIZE PAGE_SIZE
+#define MBOCHS_MEMORY_BAR_OFFSET (MBOCHS_EDID_OFFSET + \
+ MBOCHS_EDID_SIZE)
+
+#define MBOCHS_EDID_BLOB_OFFSET (MBOCHS_EDID_SIZE/2)
+
+#define STORE_LE16(addr, val) (*(u16 *)addr = val)
+#define STORE_LE32(addr, val) (*(u32 *)addr = val)
+
+
+MODULE_LICENSE("GPL v2");
+
+static int max_mbytes = 256;
+module_param_named(count, max_mbytes, int, 0444);
+MODULE_PARM_DESC(mem, "megabytes available to " MBOCHS_NAME " devices");
+
+
+#define MBOCHS_TYPE_1 "small"
+#define MBOCHS_TYPE_2 "medium"
+#define MBOCHS_TYPE_3 "large"
+
+static struct mbochs_type {
+ struct mdev_type type;
+ u32 mbytes;
+ u32 max_x;
+ u32 max_y;
+} mbochs_types[] = {
+ {
+ .type.sysfs_name = MBOCHS_TYPE_1,
+ .type.pretty_name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1,
+ .mbytes = 4,
+ .max_x = 800,
+ .max_y = 600,
+ }, {
+ .type.sysfs_name = MBOCHS_TYPE_2,
+ .type.pretty_name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2,
+ .mbytes = 16,
+ .max_x = 1920,
+ .max_y = 1440,
+ }, {
+ .type.sysfs_name = MBOCHS_TYPE_3,
+ .type.pretty_name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3,
+ .mbytes = 64,
+ .max_x = 0,
+ .max_y = 0,
+ },
+};
+
+static struct mdev_type *mbochs_mdev_types[] = {
+ &mbochs_types[0].type,
+ &mbochs_types[1].type,
+ &mbochs_types[2].type,
+};
+
+static dev_t mbochs_devt;
+static struct class *mbochs_class;
+static struct cdev mbochs_cdev;
+static struct device mbochs_dev;
+static struct mdev_parent mbochs_parent;
+static atomic_t mbochs_avail_mbytes;
+static const struct vfio_device_ops mbochs_dev_ops;
+
+struct vfio_region_info_ext {
+ struct vfio_region_info base;
+ struct vfio_region_info_cap_type type;
+};
+
+struct mbochs_mode {
+ u32 drm_format;
+ u32 bytepp;
+ u32 width;
+ u32 height;
+ u32 stride;
+ u32 __pad;
+ u64 offset;
+ u64 size;
+};
+
+struct mbochs_dmabuf {
+ struct mbochs_mode mode;
+ u32 id;
+ struct page **pages;
+ pgoff_t pagecount;
+ struct dma_buf *buf;
+ struct mdev_state *mdev_state;
+ struct list_head next;
+ bool unlinked;
+};
+
+/* State of each mdev device */
+struct mdev_state {
+ struct vfio_device vdev;
+ u8 *vconfig;
+ u64 bar_mask[3];
+ u32 memory_bar_mask;
+ struct mutex ops_lock;
+ struct mdev_device *mdev;
+
+ const struct mbochs_type *type;
+ u16 vbe[VBE_DISPI_INDEX_COUNT];
+ u64 memsize;
+ struct page **pages;
+ pgoff_t pagecount;
+ struct vfio_region_gfx_edid edid_regs;
+ u8 edid_blob[0x400];
+
+ struct list_head dmabufs;
+ u32 active_id;
+ u32 next_id;
+};
+
+static const char *vbe_name_list[VBE_DISPI_INDEX_COUNT] = {
+ [VBE_DISPI_INDEX_ID] = "id",
+ [VBE_DISPI_INDEX_XRES] = "xres",
+ [VBE_DISPI_INDEX_YRES] = "yres",
+ [VBE_DISPI_INDEX_BPP] = "bpp",
+ [VBE_DISPI_INDEX_ENABLE] = "enable",
+ [VBE_DISPI_INDEX_BANK] = "bank",
+ [VBE_DISPI_INDEX_VIRT_WIDTH] = "virt-width",
+ [VBE_DISPI_INDEX_VIRT_HEIGHT] = "virt-height",
+ [VBE_DISPI_INDEX_X_OFFSET] = "x-offset",
+ [VBE_DISPI_INDEX_Y_OFFSET] = "y-offset",
+ [VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = "video-mem",
+};
+
+static const char *vbe_name(u32 index)
+{
+ if (index < ARRAY_SIZE(vbe_name_list))
+ return vbe_name_list[index];
+ return "(invalid)";
+}
+
+static struct page *__mbochs_get_page(struct mdev_state *mdev_state,
+ pgoff_t pgoff);
+static struct page *mbochs_get_page(struct mdev_state *mdev_state,
+ pgoff_t pgoff);
+
+static void mbochs_create_config_space(struct mdev_state *mdev_state)
+{
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
+ 0x1234);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_DEVICE_ID],
+ 0x1111);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_VENDOR_ID],
+ PCI_SUBVENDOR_ID_REDHAT_QUMRANET);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_ID],
+ PCI_SUBDEVICE_ID_QEMU);
+
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_COMMAND],
+ PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_CLASS_DEVICE],
+ PCI_CLASS_DISPLAY_OTHER);
+ mdev_state->vconfig[PCI_CLASS_REVISION] = 0x01;
+
+ STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_0],
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_32 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH);
+ mdev_state->bar_mask[0] = ~(mdev_state->memsize) + 1;
+
+ STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_2],
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_32);
+ mdev_state->bar_mask[2] = ~(MBOCHS_MMIO_BAR_SIZE) + 1;
+}
+
+static int mbochs_check_framebuffer(struct mdev_state *mdev_state,
+ struct mbochs_mode *mode)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ u16 *vbe = mdev_state->vbe;
+ u32 virt_width;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ if (!(vbe[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED))
+ goto nofb;
+
+ memset(mode, 0, sizeof(*mode));
+ switch (vbe[VBE_DISPI_INDEX_BPP]) {
+ case 32:
+ mode->drm_format = DRM_FORMAT_XRGB8888;
+ mode->bytepp = 4;
+ break;
+ default:
+ dev_info_ratelimited(dev, "%s: bpp %d not supported\n",
+ __func__, vbe[VBE_DISPI_INDEX_BPP]);
+ goto nofb;
+ }
+
+ mode->width = vbe[VBE_DISPI_INDEX_XRES];
+ mode->height = vbe[VBE_DISPI_INDEX_YRES];
+ virt_width = vbe[VBE_DISPI_INDEX_VIRT_WIDTH];
+ if (virt_width < mode->width)
+ virt_width = mode->width;
+ mode->stride = virt_width * mode->bytepp;
+ mode->size = (u64)mode->stride * mode->height;
+ mode->offset = ((u64)vbe[VBE_DISPI_INDEX_X_OFFSET] * mode->bytepp +
+ (u64)vbe[VBE_DISPI_INDEX_Y_OFFSET] * mode->stride);
+
+ if (mode->width < 64 || mode->height < 64) {
+ dev_info_ratelimited(dev, "%s: invalid resolution %dx%d\n",
+ __func__, mode->width, mode->height);
+ goto nofb;
+ }
+ if (mode->offset + mode->size > mdev_state->memsize) {
+ dev_info_ratelimited(dev, "%s: framebuffer memory overflow\n",
+ __func__);
+ goto nofb;
+ }
+
+ return 0;
+
+nofb:
+ memset(mode, 0, sizeof(*mode));
+ return -EINVAL;
+}
+
+static bool mbochs_modes_equal(struct mbochs_mode *mode1,
+ struct mbochs_mode *mode2)
+{
+ return memcmp(mode1, mode2, sizeof(struct mbochs_mode)) == 0;
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ int index = (offset - PCI_BASE_ADDRESS_0) / 0x04;
+ u32 cfg_addr;
+
+ switch (offset) {
+ case PCI_BASE_ADDRESS_0:
+ case PCI_BASE_ADDRESS_2:
+ cfg_addr = *(u32 *)buf;
+
+ if (cfg_addr == 0xffffffff) {
+ cfg_addr = (cfg_addr & mdev_state->bar_mask[index]);
+ } else {
+ cfg_addr &= PCI_BASE_ADDRESS_MEM_MASK;
+ if (cfg_addr)
+ dev_info(dev, "BAR #%d @ 0x%x\n",
+ index, cfg_addr);
+ }
+
+ cfg_addr |= (mdev_state->vconfig[offset] &
+ ~PCI_BASE_ADDRESS_MEM_MASK);
+ STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+ break;
+ }
+}
+
+static void handle_mmio_write(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ int index;
+ u16 reg16;
+
+ switch (offset) {
+ case 0x400 ... 0x41f: /* vga ioports remapped */
+ goto unhandled;
+ case 0x500 ... 0x515: /* bochs dispi interface */
+ if (count != 2)
+ goto unhandled;
+ index = (offset - 0x500) / 2;
+ reg16 = *(u16 *)buf;
+ if (index < ARRAY_SIZE(mdev_state->vbe))
+ mdev_state->vbe[index] = reg16;
+ dev_dbg(dev, "%s: vbe write %d = %d (%s)\n",
+ __func__, index, reg16, vbe_name(index));
+ break;
+ case 0x600 ... 0x607: /* qemu extended regs */
+ goto unhandled;
+ default:
+unhandled:
+ dev_dbg(dev, "%s: @0x%03x, count %d (unhandled)\n",
+ __func__, offset, count);
+ break;
+ }
+}
+
+static void handle_mmio_read(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ struct vfio_region_gfx_edid *edid;
+ u16 reg16 = 0;
+ int index;
+
+ switch (offset) {
+ case 0x000 ... 0x3ff: /* edid block */
+ edid = &mdev_state->edid_regs;
+ if (edid->link_state != VFIO_DEVICE_GFX_LINK_STATE_UP ||
+ offset >= edid->edid_size) {
+ memset(buf, 0, count);
+ break;
+ }
+ memcpy(buf, mdev_state->edid_blob + offset, count);
+ break;
+ case 0x500 ... 0x515: /* bochs dispi interface */
+ if (count != 2)
+ goto unhandled;
+ index = (offset - 0x500) / 2;
+ if (index < ARRAY_SIZE(mdev_state->vbe))
+ reg16 = mdev_state->vbe[index];
+ dev_dbg(dev, "%s: vbe read %d = %d (%s)\n",
+ __func__, index, reg16, vbe_name(index));
+ *(u16 *)buf = reg16;
+ break;
+ default:
+unhandled:
+ dev_dbg(dev, "%s: @0x%03x, count %d (unhandled)\n",
+ __func__, offset, count);
+ memset(buf, 0, count);
+ break;
+ }
+}
+
+static void handle_edid_regs(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count, bool is_write)
+{
+ char *regs = (void *)&mdev_state->edid_regs;
+
+ if (offset + count > sizeof(mdev_state->edid_regs))
+ return;
+ if (count != 4)
+ return;
+ if (offset % 4)
+ return;
+
+ if (is_write) {
+ switch (offset) {
+ case offsetof(struct vfio_region_gfx_edid, link_state):
+ case offsetof(struct vfio_region_gfx_edid, edid_size):
+ memcpy(regs + offset, buf, count);
+ break;
+ default:
+ /* read-only regs */
+ break;
+ }
+ } else {
+ memcpy(buf, regs + offset, count);
+ }
+}
+
+static void handle_edid_blob(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count, bool is_write)
+{
+ if (offset + count > mdev_state->edid_regs.edid_max_size)
+ return;
+ if (is_write)
+ memcpy(mdev_state->edid_blob + offset, buf, count);
+ else
+ memcpy(buf, mdev_state->edid_blob + offset, count);
+}
+
+static ssize_t mdev_access(struct mdev_state *mdev_state, char *buf,
+ size_t count, loff_t pos, bool is_write)
+{
+ struct page *pg;
+ loff_t poff;
+ char *map;
+ int ret = 0;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ if (pos < MBOCHS_CONFIG_SPACE_SIZE) {
+ if (is_write)
+ handle_pci_cfg_write(mdev_state, pos, buf, count);
+ else
+ memcpy(buf, (mdev_state->vconfig + pos), count);
+
+ } else if (pos >= MBOCHS_MMIO_BAR_OFFSET &&
+ pos + count <= (MBOCHS_MMIO_BAR_OFFSET +
+ MBOCHS_MMIO_BAR_SIZE)) {
+ pos -= MBOCHS_MMIO_BAR_OFFSET;
+ if (is_write)
+ handle_mmio_write(mdev_state, pos, buf, count);
+ else
+ handle_mmio_read(mdev_state, pos, buf, count);
+
+ } else if (pos >= MBOCHS_EDID_OFFSET &&
+ pos + count <= (MBOCHS_EDID_OFFSET +
+ MBOCHS_EDID_SIZE)) {
+ pos -= MBOCHS_EDID_OFFSET;
+ if (pos < MBOCHS_EDID_BLOB_OFFSET) {
+ handle_edid_regs(mdev_state, pos, buf, count, is_write);
+ } else {
+ pos -= MBOCHS_EDID_BLOB_OFFSET;
+ handle_edid_blob(mdev_state, pos, buf, count, is_write);
+ }
+
+ } else if (pos >= MBOCHS_MEMORY_BAR_OFFSET &&
+ pos + count <=
+ MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) {
+ pos -= MBOCHS_MMIO_BAR_OFFSET;
+ poff = pos & ~PAGE_MASK;
+ pg = __mbochs_get_page(mdev_state, pos >> PAGE_SHIFT);
+ map = kmap(pg);
+ if (is_write)
+ memcpy(map + poff, buf, count);
+ else
+ memcpy(buf, map + poff, count);
+ kunmap(pg);
+ put_page(pg);
+
+ } else {
+ dev_dbg(mdev_state->vdev.dev, "%s: %s @0x%llx (unhandled)\n",
+ __func__, is_write ? "WR" : "RD", pos);
+ ret = -1;
+ goto accessfailed;
+ }
+
+ ret = count;
+
+
+accessfailed:
+ mutex_unlock(&mdev_state->ops_lock);
+
+ return ret;
+}
+
+static int mbochs_reset(struct mdev_state *mdev_state)
+{
+ u32 size64k = mdev_state->memsize / (64 * 1024);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mdev_state->vbe); i++)
+ mdev_state->vbe[i] = 0;
+ mdev_state->vbe[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;
+ mdev_state->vbe[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = size64k;
+ return 0;
+}
+
+static int mbochs_init_dev(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ struct mdev_device *mdev = to_mdev_device(vdev->dev);
+ struct mbochs_type *type =
+ container_of(mdev->type, struct mbochs_type, type);
+ int avail_mbytes = atomic_read(&mbochs_avail_mbytes);
+ int ret = -ENOMEM;
+
+ do {
+ if (avail_mbytes < type->mbytes)
+ return -ENOSPC;
+ } while (!atomic_try_cmpxchg(&mbochs_avail_mbytes, &avail_mbytes,
+ avail_mbytes - type->mbytes));
+
+ mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL);
+ if (!mdev_state->vconfig)
+ goto err_avail;
+
+ mdev_state->memsize = type->mbytes * 1024 * 1024;
+ mdev_state->pagecount = mdev_state->memsize >> PAGE_SHIFT;
+ mdev_state->pages = kcalloc(mdev_state->pagecount,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (!mdev_state->pages)
+ goto err_vconfig;
+
+ mutex_init(&mdev_state->ops_lock);
+ mdev_state->mdev = mdev;
+ INIT_LIST_HEAD(&mdev_state->dmabufs);
+ mdev_state->next_id = 1;
+
+ mdev_state->type = type;
+ mdev_state->edid_regs.max_xres = type->max_x;
+ mdev_state->edid_regs.max_yres = type->max_y;
+ mdev_state->edid_regs.edid_offset = MBOCHS_EDID_BLOB_OFFSET;
+ mdev_state->edid_regs.edid_max_size = sizeof(mdev_state->edid_blob);
+ mbochs_create_config_space(mdev_state);
+ mbochs_reset(mdev_state);
+
+ dev_info(vdev->dev, "%s: %s, %d MB, %ld pages\n", __func__,
+ type->type.pretty_name, type->mbytes, mdev_state->pagecount);
+ return 0;
+
+err_vconfig:
+ kfree(mdev_state->vconfig);
+err_avail:
+ atomic_add(type->mbytes, &mbochs_avail_mbytes);
+ return ret;
+}
+
+static int mbochs_probe(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state;
+ int ret = -ENOMEM;
+
+ mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev,
+ &mbochs_dev_ops);
+ if (IS_ERR(mdev_state))
+ return PTR_ERR(mdev_state);
+
+ ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
+ if (ret)
+ goto err_put_vdev;
+ dev_set_drvdata(&mdev->dev, mdev_state);
+ return 0;
+
+err_put_vdev:
+ vfio_put_device(&mdev_state->vdev);
+ return ret;
+}
+
+static void mbochs_release_dev(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ atomic_add(mdev_state->type->mbytes, &mbochs_avail_mbytes);
+ kfree(mdev_state->pages);
+ kfree(mdev_state->vconfig);
+}
+
+static void mbochs_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);
+
+ vfio_unregister_group_dev(&mdev_state->vdev);
+ vfio_put_device(&mdev_state->vdev);
+}
+
+static ssize_t mbochs_read(struct vfio_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 1;
+ }
+
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+
+read_err:
+ return -EFAULT;
+}
+
+static ssize_t mbochs_write(struct vfio_device *vdev, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 1;
+ }
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+write_err:
+ return -EFAULT;
+}
+
+static struct page *__mbochs_get_page(struct mdev_state *mdev_state,
+ pgoff_t pgoff)
+{
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ if (!mdev_state->pages[pgoff]) {
+ mdev_state->pages[pgoff] =
+ alloc_pages(GFP_HIGHUSER | __GFP_ZERO, 0);
+ if (!mdev_state->pages[pgoff])
+ return NULL;
+ }
+
+ get_page(mdev_state->pages[pgoff]);
+ return mdev_state->pages[pgoff];
+}
+
+static struct page *mbochs_get_page(struct mdev_state *mdev_state,
+ pgoff_t pgoff)
+{
+ struct page *page;
+
+ if (WARN_ON(pgoff >= mdev_state->pagecount))
+ return NULL;
+
+ mutex_lock(&mdev_state->ops_lock);
+ page = __mbochs_get_page(mdev_state, pgoff);
+ mutex_unlock(&mdev_state->ops_lock);
+
+ return page;
+}
+
+static void mbochs_put_pages(struct mdev_state *mdev_state)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ int i, count = 0;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ for (i = 0; i < mdev_state->pagecount; i++) {
+ if (!mdev_state->pages[i])
+ continue;
+ put_page(mdev_state->pages[i]);
+ mdev_state->pages[i] = NULL;
+ count++;
+ }
+ dev_dbg(dev, "%s: %d pages released\n", __func__, count);
+}
+
+static vm_fault_t mbochs_region_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct mdev_state *mdev_state = vma->vm_private_data;
+ pgoff_t page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+
+ if (page_offset >= mdev_state->pagecount)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = mbochs_get_page(mdev_state, page_offset);
+ if (!vmf->page)
+ return VM_FAULT_SIGBUS;
+
+ return 0;
+}
+
+static const struct vm_operations_struct mbochs_region_vm_ops = {
+ .fault = mbochs_region_vm_fault,
+};
+
+static int mbochs_mmap(struct vfio_device *vdev, struct vm_area_struct *vma)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ if (vma->vm_pgoff != MBOCHS_MEMORY_BAR_OFFSET >> PAGE_SHIFT)
+ return -EINVAL;
+ if (vma->vm_end < vma->vm_start)
+ return -EINVAL;
+ if (vma->vm_end - vma->vm_start > mdev_state->memsize)
+ return -EINVAL;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
+ vma->vm_ops = &mbochs_region_vm_ops;
+ vma->vm_private_data = mdev_state;
+ return 0;
+}
+
+static vm_fault_t mbochs_dmabuf_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct mbochs_dmabuf *dmabuf = vma->vm_private_data;
+
+ if (WARN_ON(vmf->pgoff >= dmabuf->pagecount))
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = dmabuf->pages[vmf->pgoff];
+ get_page(vmf->page);
+ return 0;
+}
+
+static const struct vm_operations_struct mbochs_dmabuf_vm_ops = {
+ .fault = mbochs_dmabuf_vm_fault,
+};
+
+static int mbochs_mmap_dmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
+{
+ struct mbochs_dmabuf *dmabuf = buf->priv;
+ struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
+ vma->vm_ops = &mbochs_dmabuf_vm_ops;
+ vma->vm_private_data = dmabuf;
+ return 0;
+}
+
+static void mbochs_print_dmabuf(struct mbochs_dmabuf *dmabuf,
+ const char *prefix)
+{
+ struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+ u32 fourcc = dmabuf->mode.drm_format;
+
+ dev_dbg(dev, "%s/%d: %c%c%c%c, %dx%d, stride %d, off 0x%llx, size 0x%llx, pages %ld\n",
+ prefix, dmabuf->id,
+ fourcc ? ((fourcc >> 0) & 0xff) : '-',
+ fourcc ? ((fourcc >> 8) & 0xff) : '-',
+ fourcc ? ((fourcc >> 16) & 0xff) : '-',
+ fourcc ? ((fourcc >> 24) & 0xff) : '-',
+ dmabuf->mode.width, dmabuf->mode.height, dmabuf->mode.stride,
+ dmabuf->mode.offset, dmabuf->mode.size, dmabuf->pagecount);
+}
+
+static struct sg_table *mbochs_map_dmabuf(struct dma_buf_attachment *at,
+ enum dma_data_direction direction)
+{
+ struct mbochs_dmabuf *dmabuf = at->dmabuf->priv;
+ struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+ struct sg_table *sg;
+
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+ sg = kzalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg)
+ goto err1;
+ if (sg_alloc_table_from_pages(sg, dmabuf->pages, dmabuf->pagecount,
+ 0, dmabuf->mode.size, GFP_KERNEL) < 0)
+ goto err2;
+ if (dma_map_sgtable(at->dev, sg, direction, 0))
+ goto err3;
+
+ return sg;
+
+err3:
+ sg_free_table(sg);
+err2:
+ kfree(sg);
+err1:
+ return ERR_PTR(-ENOMEM);
+}
+
+static void mbochs_unmap_dmabuf(struct dma_buf_attachment *at,
+ struct sg_table *sg,
+ enum dma_data_direction direction)
+{
+ struct mbochs_dmabuf *dmabuf = at->dmabuf->priv;
+ struct device *dev = mdev_dev(dmabuf->mdev_state->mdev);
+
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+ dma_unmap_sgtable(at->dev, sg, direction, 0);
+ sg_free_table(sg);
+ kfree(sg);
+}
+
+static void mbochs_release_dmabuf(struct dma_buf *buf)
+{
+ struct mbochs_dmabuf *dmabuf = buf->priv;
+ struct mdev_state *mdev_state = dmabuf->mdev_state;
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ pgoff_t pg;
+
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+
+ for (pg = 0; pg < dmabuf->pagecount; pg++)
+ put_page(dmabuf->pages[pg]);
+
+ mutex_lock(&mdev_state->ops_lock);
+ dmabuf->buf = NULL;
+ if (dmabuf->unlinked)
+ kfree(dmabuf);
+ mutex_unlock(&mdev_state->ops_lock);
+}
+
+static struct dma_buf_ops mbochs_dmabuf_ops = {
+ .map_dma_buf = mbochs_map_dmabuf,
+ .unmap_dma_buf = mbochs_unmap_dmabuf,
+ .release = mbochs_release_dmabuf,
+ .mmap = mbochs_mmap_dmabuf,
+};
+
+static struct mbochs_dmabuf *mbochs_dmabuf_alloc(struct mdev_state *mdev_state,
+ struct mbochs_mode *mode)
+{
+ struct mbochs_dmabuf *dmabuf;
+ pgoff_t page_offset, pg;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ dmabuf = kzalloc(sizeof(struct mbochs_dmabuf), GFP_KERNEL);
+ if (!dmabuf)
+ return NULL;
+
+ dmabuf->mode = *mode;
+ dmabuf->id = mdev_state->next_id++;
+ dmabuf->pagecount = DIV_ROUND_UP(mode->size, PAGE_SIZE);
+ dmabuf->pages = kcalloc(dmabuf->pagecount, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!dmabuf->pages)
+ goto err_free_dmabuf;
+
+ page_offset = dmabuf->mode.offset >> PAGE_SHIFT;
+ for (pg = 0; pg < dmabuf->pagecount; pg++) {
+ dmabuf->pages[pg] = __mbochs_get_page(mdev_state,
+ page_offset + pg);
+ if (!dmabuf->pages[pg])
+ goto err_free_pages;
+ }
+
+ dmabuf->mdev_state = mdev_state;
+ list_add(&dmabuf->next, &mdev_state->dmabufs);
+
+ mbochs_print_dmabuf(dmabuf, __func__);
+ return dmabuf;
+
+err_free_pages:
+ while (pg > 0)
+ put_page(dmabuf->pages[--pg]);
+ kfree(dmabuf->pages);
+err_free_dmabuf:
+ kfree(dmabuf);
+ return NULL;
+}
+
+static struct mbochs_dmabuf *
+mbochs_dmabuf_find_by_mode(struct mdev_state *mdev_state,
+ struct mbochs_mode *mode)
+{
+ struct mbochs_dmabuf *dmabuf;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ list_for_each_entry(dmabuf, &mdev_state->dmabufs, next)
+ if (mbochs_modes_equal(&dmabuf->mode, mode))
+ return dmabuf;
+
+ return NULL;
+}
+
+static struct mbochs_dmabuf *
+mbochs_dmabuf_find_by_id(struct mdev_state *mdev_state, u32 id)
+{
+ struct mbochs_dmabuf *dmabuf;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ list_for_each_entry(dmabuf, &mdev_state->dmabufs, next)
+ if (dmabuf->id == id)
+ return dmabuf;
+
+ return NULL;
+}
+
+static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf)
+{
+ struct mdev_state *mdev_state = dmabuf->mdev_state;
+ struct device *dev = mdev_state->vdev.dev;
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+ struct dma_buf *buf;
+
+ WARN_ON(!mutex_is_locked(&mdev_state->ops_lock));
+
+ if (!IS_ALIGNED(dmabuf->mode.offset, PAGE_SIZE)) {
+ dev_info_ratelimited(dev, "%s: framebuffer not page-aligned\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ exp_info.ops = &mbochs_dmabuf_ops;
+ exp_info.size = dmabuf->mode.size;
+ exp_info.priv = dmabuf;
+
+ buf = dma_buf_export(&exp_info);
+ if (IS_ERR(buf)) {
+ dev_info_ratelimited(dev, "%s: dma_buf_export failed: %ld\n",
+ __func__, PTR_ERR(buf));
+ return PTR_ERR(buf);
+ }
+
+ dmabuf->buf = buf;
+ dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id);
+ return 0;
+}
+
+static int mbochs_get_region_info(struct mdev_state *mdev_state,
+ struct vfio_region_info_ext *ext)
+{
+ struct vfio_region_info *region_info = &ext->base;
+
+ if (region_info->index >= MBOCHS_NUM_REGIONS)
+ return -EINVAL;
+
+ switch (region_info->index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ region_info->offset = 0;
+ region_info->size = MBOCHS_CONFIG_SPACE_SIZE;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE);
+ break;
+ case VFIO_PCI_BAR0_REGION_INDEX:
+ region_info->offset = MBOCHS_MEMORY_BAR_OFFSET;
+ region_info->size = mdev_state->memsize;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP);
+ break;
+ case VFIO_PCI_BAR2_REGION_INDEX:
+ region_info->offset = MBOCHS_MMIO_BAR_OFFSET;
+ region_info->size = MBOCHS_MMIO_BAR_SIZE;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE);
+ break;
+ case MBOCHS_EDID_REGION_INDEX:
+ ext->base.argsz = sizeof(*ext);
+ ext->base.offset = MBOCHS_EDID_OFFSET;
+ ext->base.size = MBOCHS_EDID_SIZE;
+ ext->base.flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_CAPS);
+ ext->base.cap_offset = offsetof(typeof(*ext), type);
+ ext->type.header.id = VFIO_REGION_INFO_CAP_TYPE;
+ ext->type.header.version = 1;
+ ext->type.header.next = 0;
+ ext->type.type = VFIO_REGION_TYPE_GFX;
+ ext->type.subtype = VFIO_REGION_SUBTYPE_GFX_EDID;
+ break;
+ default:
+ region_info->size = 0;
+ region_info->offset = 0;
+ region_info->flags = 0;
+ }
+
+ return 0;
+}
+
+static int mbochs_get_irq_info(struct vfio_irq_info *irq_info)
+{
+ irq_info->count = 0;
+ return 0;
+}
+
+static int mbochs_get_device_info(struct vfio_device_info *dev_info)
+{
+ dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+ dev_info->num_regions = MBOCHS_NUM_REGIONS;
+ dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+ return 0;
+}
+
+static int mbochs_query_gfx_plane(struct mdev_state *mdev_state,
+ struct vfio_device_gfx_plane_info *plane)
+{
+ struct mbochs_dmabuf *dmabuf;
+ struct mbochs_mode mode;
+ int ret;
+
+ if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) {
+ if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE |
+ VFIO_GFX_PLANE_TYPE_DMABUF))
+ return 0;
+ return -EINVAL;
+ }
+
+ if (plane->flags != VFIO_GFX_PLANE_TYPE_DMABUF)
+ return -EINVAL;
+
+ plane->drm_format_mod = 0;
+ plane->x_pos = 0;
+ plane->y_pos = 0;
+ plane->x_hot = 0;
+ plane->y_hot = 0;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ ret = -EINVAL;
+ if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY)
+ ret = mbochs_check_framebuffer(mdev_state, &mode);
+ if (ret < 0) {
+ plane->drm_format = 0;
+ plane->width = 0;
+ plane->height = 0;
+ plane->stride = 0;
+ plane->size = 0;
+ plane->dmabuf_id = 0;
+ goto done;
+ }
+
+ dmabuf = mbochs_dmabuf_find_by_mode(mdev_state, &mode);
+ if (!dmabuf)
+ mbochs_dmabuf_alloc(mdev_state, &mode);
+ if (!dmabuf) {
+ mutex_unlock(&mdev_state->ops_lock);
+ return -ENOMEM;
+ }
+
+ plane->drm_format = dmabuf->mode.drm_format;
+ plane->width = dmabuf->mode.width;
+ plane->height = dmabuf->mode.height;
+ plane->stride = dmabuf->mode.stride;
+ plane->size = dmabuf->mode.size;
+ plane->dmabuf_id = dmabuf->id;
+
+done:
+ if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY &&
+ mdev_state->active_id != plane->dmabuf_id) {
+ dev_dbg(mdev_state->vdev.dev, "%s: primary: %d => %d\n",
+ __func__, mdev_state->active_id, plane->dmabuf_id);
+ mdev_state->active_id = plane->dmabuf_id;
+ }
+ mutex_unlock(&mdev_state->ops_lock);
+ return 0;
+}
+
+static int mbochs_get_gfx_dmabuf(struct mdev_state *mdev_state, u32 id)
+{
+ struct mbochs_dmabuf *dmabuf;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ dmabuf = mbochs_dmabuf_find_by_id(mdev_state, id);
+ if (!dmabuf) {
+ mutex_unlock(&mdev_state->ops_lock);
+ return -ENOENT;
+ }
+
+ if (!dmabuf->buf)
+ mbochs_dmabuf_export(dmabuf);
+
+ mutex_unlock(&mdev_state->ops_lock);
+
+ if (!dmabuf->buf)
+ return -EINVAL;
+
+ return dma_buf_fd(dmabuf->buf, 0);
+}
+
+static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd,
+ unsigned long arg)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ int ret = 0;
+ unsigned long minsz, outsz;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mbochs_get_device_info(&info);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info_ext info;
+
+ minsz = offsetofend(typeof(info), base.offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ outsz = info.base.argsz;
+ if (outsz < minsz)
+ return -EINVAL;
+ if (outsz > sizeof(info))
+ return -EINVAL;
+
+ ret = mbochs_get_region_info(mdev_state, &info);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, outsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if ((info.argsz < minsz) ||
+ (info.index >= VFIO_PCI_NUM_IRQS))
+ return -EINVAL;
+
+ ret = mbochs_get_irq_info(&info);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_QUERY_GFX_PLANE:
+ {
+ struct vfio_device_gfx_plane_info plane;
+
+ minsz = offsetofend(struct vfio_device_gfx_plane_info,
+ region_index);
+
+ if (copy_from_user(&plane, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (plane.argsz < minsz)
+ return -EINVAL;
+
+ ret = mbochs_query_gfx_plane(mdev_state, &plane);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &plane, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_GET_GFX_DMABUF:
+ {
+ u32 dmabuf_id;
+
+ if (get_user(dmabuf_id, (__u32 __user *)arg))
+ return -EFAULT;
+
+ return mbochs_get_gfx_dmabuf(mdev_state, dmabuf_id);
+ }
+
+ case VFIO_DEVICE_SET_IRQS:
+ return -EINVAL;
+
+ case VFIO_DEVICE_RESET:
+ return mbochs_reset(mdev_state);
+ }
+ return -ENOTTY;
+}
+
+static void mbochs_close_device(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ struct mbochs_dmabuf *dmabuf, *tmp;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ list_for_each_entry_safe(dmabuf, tmp, &mdev_state->dmabufs, next) {
+ list_del(&dmabuf->next);
+ if (dmabuf->buf) {
+ /* free in mbochs_release_dmabuf() */
+ dmabuf->unlinked = true;
+ } else {
+ kfree(dmabuf);
+ }
+ }
+ mbochs_put_pages(mdev_state);
+
+ mutex_unlock(&mdev_state->ops_lock);
+}
+
+static ssize_t
+memory_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct mdev_state *mdev_state = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d MB\n", mdev_state->type->mbytes);
+}
+static DEVICE_ATTR_RO(memory);
+
+static struct attribute *mdev_dev_attrs[] = {
+ &dev_attr_memory.attr,
+ NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+ .name = "vendor",
+ .attrs = mdev_dev_attrs,
+};
+
+static const struct attribute_group *mdev_dev_groups[] = {
+ &mdev_dev_group,
+ NULL,
+};
+
+static ssize_t mbochs_show_description(struct mdev_type *mtype, char *buf)
+{
+ struct mbochs_type *type =
+ container_of(mtype, struct mbochs_type, type);
+
+ return sprintf(buf, "virtual display, %d MB video memory\n",
+ type ? type->mbytes : 0);
+}
+
+static unsigned int mbochs_get_available(struct mdev_type *mtype)
+{
+ struct mbochs_type *type =
+ container_of(mtype, struct mbochs_type, type);
+
+ return atomic_read(&mbochs_avail_mbytes) / type->mbytes;
+}
+
+static const struct vfio_device_ops mbochs_dev_ops = {
+ .close_device = mbochs_close_device,
+ .init = mbochs_init_dev,
+ .release = mbochs_release_dev,
+ .read = mbochs_read,
+ .write = mbochs_write,
+ .ioctl = mbochs_ioctl,
+ .mmap = mbochs_mmap,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
+ .detach_ioas = vfio_iommufd_emulated_detach_ioas,
+};
+
+static struct mdev_driver mbochs_driver = {
+ .device_api = VFIO_DEVICE_API_PCI_STRING,
+ .driver = {
+ .name = "mbochs",
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ .dev_groups = mdev_dev_groups,
+ },
+ .probe = mbochs_probe,
+ .remove = mbochs_remove,
+ .get_available = mbochs_get_available,
+ .show_description = mbochs_show_description,
+};
+
+static const struct file_operations vd_fops = {
+ .owner = THIS_MODULE,
+};
+
+static void mbochs_device_release(struct device *dev)
+{
+ /* nothing */
+}
+
+static int __init mbochs_dev_init(void)
+{
+ int ret = 0;
+
+ atomic_set(&mbochs_avail_mbytes, max_mbytes);
+
+ ret = alloc_chrdev_region(&mbochs_devt, 0, MINORMASK + 1, MBOCHS_NAME);
+ if (ret < 0) {
+ pr_err("Error: failed to register mbochs_dev, err: %d\n", ret);
+ return ret;
+ }
+ cdev_init(&mbochs_cdev, &vd_fops);
+ cdev_add(&mbochs_cdev, mbochs_devt, MINORMASK + 1);
+ pr_info("%s: major %d\n", __func__, MAJOR(mbochs_devt));
+
+ ret = mdev_register_driver(&mbochs_driver);
+ if (ret)
+ goto err_cdev;
+
+ mbochs_class = class_create(MBOCHS_CLASS_NAME);
+ if (IS_ERR(mbochs_class)) {
+ pr_err("Error: failed to register mbochs_dev class\n");
+ ret = PTR_ERR(mbochs_class);
+ goto err_driver;
+ }
+ mbochs_dev.class = mbochs_class;
+ mbochs_dev.release = mbochs_device_release;
+ dev_set_name(&mbochs_dev, "%s", MBOCHS_NAME);
+
+ ret = device_register(&mbochs_dev);
+ if (ret)
+ goto err_put;
+
+ ret = mdev_register_parent(&mbochs_parent, &mbochs_dev, &mbochs_driver,
+ mbochs_mdev_types,
+ ARRAY_SIZE(mbochs_mdev_types));
+ if (ret)
+ goto err_device;
+
+ return 0;
+
+err_device:
+ device_del(&mbochs_dev);
+err_put:
+ put_device(&mbochs_dev);
+ class_destroy(mbochs_class);
+err_driver:
+ mdev_unregister_driver(&mbochs_driver);
+err_cdev:
+ cdev_del(&mbochs_cdev);
+ unregister_chrdev_region(mbochs_devt, MINORMASK + 1);
+ return ret;
+}
+
+static void __exit mbochs_dev_exit(void)
+{
+ mbochs_dev.bus = NULL;
+ mdev_unregister_parent(&mbochs_parent);
+
+ device_unregister(&mbochs_dev);
+ mdev_unregister_driver(&mbochs_driver);
+ cdev_del(&mbochs_cdev);
+ unregister_chrdev_region(mbochs_devt, MINORMASK + 1);
+ class_destroy(mbochs_class);
+ mbochs_class = NULL;
+}
+
+MODULE_IMPORT_NS(DMA_BUF);
+module_init(mbochs_dev_init)
+module_exit(mbochs_dev_exit)
diff --git a/samples/vfio-mdev/mdpy-defs.h b/samples/vfio-mdev/mdpy-defs.h
new file mode 100644
index 0000000000..961c55ec3f
--- /dev/null
+++ b/samples/vfio-mdev/mdpy-defs.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Simple pci display device.
+ *
+ * Framebuffer memory is pci bar 0.
+ * Configuration (read-only) is in pci config space.
+ * Format field uses drm fourcc codes.
+ * ATM only DRM_FORMAT_XRGB8888 is supported.
+ */
+
+/* pci ids */
+#define MDPY_PCI_VENDOR_ID PCI_VENDOR_ID_REDHAT
+#define MDPY_PCI_DEVICE_ID 0x000f
+#define MDPY_PCI_SUBVENDOR_ID PCI_SUBVENDOR_ID_REDHAT_QUMRANET
+#define MDPY_PCI_SUBDEVICE_ID PCI_SUBDEVICE_ID_QEMU
+
+/* pci cfg space offsets for fb config (dword) */
+#define MDPY_VENDORCAP_OFFSET 0x40
+#define MDPY_VENDORCAP_SIZE 0x10
+#define MDPY_FORMAT_OFFSET (MDPY_VENDORCAP_OFFSET + 0x04)
+#define MDPY_WIDTH_OFFSET (MDPY_VENDORCAP_OFFSET + 0x08)
+#define MDPY_HEIGHT_OFFSET (MDPY_VENDORCAP_OFFSET + 0x0c)
diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
new file mode 100644
index 0000000000..4598bc28ac
--- /dev/null
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Framebuffer driver for mdpy (mediated virtual pci display device).
+ *
+ * See mdpy-defs.h for device specs
+ *
+ * (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * Using some code snippets from simplefb and cirrusfb.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/errno.h>
+#include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <drm/drm_fourcc.h>
+#include "mdpy-defs.h"
+
+static const struct fb_fix_screeninfo mdpy_fb_fix = {
+ .id = "mdpy-fb",
+ .type = FB_TYPE_PACKED_PIXELS,
+ .visual = FB_VISUAL_TRUECOLOR,
+ .accel = FB_ACCEL_NONE,
+};
+
+static const struct fb_var_screeninfo mdpy_fb_var = {
+ .height = -1,
+ .width = -1,
+ .activate = FB_ACTIVATE_NOW,
+ .vmode = FB_VMODE_NONINTERLACED,
+
+ .bits_per_pixel = 32,
+ .transp.offset = 24,
+ .red.offset = 16,
+ .green.offset = 8,
+ .blue.offset = 0,
+ .transp.length = 8,
+ .red.length = 8,
+ .green.length = 8,
+ .blue.length = 8,
+};
+
+#define PSEUDO_PALETTE_SIZE 16
+
+struct mdpy_fb_par {
+ u32 palette[PSEUDO_PALETTE_SIZE];
+};
+
+static int mdpy_fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
+ u_int transp, struct fb_info *info)
+{
+ u32 *pal = info->pseudo_palette;
+ u32 cr = red >> (16 - info->var.red.length);
+ u32 cg = green >> (16 - info->var.green.length);
+ u32 cb = blue >> (16 - info->var.blue.length);
+ u32 value, mask;
+
+ if (regno >= PSEUDO_PALETTE_SIZE)
+ return -EINVAL;
+
+ value = (cr << info->var.red.offset) |
+ (cg << info->var.green.offset) |
+ (cb << info->var.blue.offset);
+ if (info->var.transp.length > 0) {
+ mask = (1 << info->var.transp.length) - 1;
+ mask <<= info->var.transp.offset;
+ value |= mask;
+ }
+ pal[regno] = value;
+
+ return 0;
+}
+
+static void mdpy_fb_destroy(struct fb_info *info)
+{
+ if (info->screen_base)
+ iounmap(info->screen_base);
+}
+
+static const struct fb_ops mdpy_fb_ops = {
+ .owner = THIS_MODULE,
+ FB_DEFAULT_IOMEM_OPS,
+ .fb_destroy = mdpy_fb_destroy,
+ .fb_setcolreg = mdpy_fb_setcolreg,
+};
+
+static int mdpy_fb_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct fb_info *info;
+ struct mdpy_fb_par *par;
+ u32 format, width, height;
+ int ret;
+
+ ret = pci_enable_device(pdev);
+ if (ret < 0)
+ return ret;
+
+ ret = pci_request_regions(pdev, "mdpy-fb");
+ if (ret < 0)
+ goto err_disable_dev;
+
+ pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format);
+ pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET, &width);
+ pci_read_config_dword(pdev, MDPY_HEIGHT_OFFSET, &height);
+ if (format != DRM_FORMAT_XRGB8888) {
+ pci_err(pdev, "format mismatch (0x%x != 0x%x)\n",
+ format, DRM_FORMAT_XRGB8888);
+ ret = -EINVAL;
+ goto err_release_regions;
+ }
+ if (width < 100 || width > 10000) {
+ pci_err(pdev, "width (%d) out of range\n", width);
+ ret = -EINVAL;
+ goto err_release_regions;
+ }
+ if (height < 100 || height > 10000) {
+ pci_err(pdev, "height (%d) out of range\n", height);
+ ret = -EINVAL;
+ goto err_release_regions;
+ }
+ pci_info(pdev, "mdpy found: %dx%d framebuffer\n",
+ width, height);
+
+ info = framebuffer_alloc(sizeof(struct mdpy_fb_par), &pdev->dev);
+ if (!info) {
+ ret = -ENOMEM;
+ goto err_release_regions;
+ }
+ pci_set_drvdata(pdev, info);
+ par = info->par;
+
+ info->fix = mdpy_fb_fix;
+ info->fix.smem_start = pci_resource_start(pdev, 0);
+ info->fix.smem_len = pci_resource_len(pdev, 0);
+ info->fix.line_length = width * 4;
+
+ info->var = mdpy_fb_var;
+ info->var.xres = width;
+ info->var.yres = height;
+ info->var.xres_virtual = width;
+ info->var.yres_virtual = height;
+
+ info->screen_size = info->fix.smem_len;
+ info->screen_base = ioremap(info->fix.smem_start,
+ info->screen_size);
+ if (!info->screen_base) {
+ pci_err(pdev, "ioremap(pcibar) failed\n");
+ ret = -EIO;
+ goto err_release_fb;
+ }
+
+ info->fbops = &mdpy_fb_ops;
+ info->pseudo_palette = par->palette;
+
+ ret = register_framebuffer(info);
+ if (ret < 0) {
+ pci_err(pdev, "mdpy-fb device register failed: %d\n", ret);
+ goto err_unmap;
+ }
+
+ pci_info(pdev, "fb%d registered\n", info->node);
+ return 0;
+
+err_unmap:
+ iounmap(info->screen_base);
+
+err_release_fb:
+ framebuffer_release(info);
+
+err_release_regions:
+ pci_release_regions(pdev);
+
+err_disable_dev:
+ pci_disable_device(pdev);
+
+ return ret;
+}
+
+static void mdpy_fb_remove(struct pci_dev *pdev)
+{
+ struct fb_info *info = pci_get_drvdata(pdev);
+
+ unregister_framebuffer(info);
+ iounmap(info->screen_base);
+ framebuffer_release(info);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+static struct pci_device_id mdpy_fb_pci_table[] = {
+ {
+ .vendor = MDPY_PCI_VENDOR_ID,
+ .device = MDPY_PCI_DEVICE_ID,
+ .subvendor = MDPY_PCI_SUBVENDOR_ID,
+ .subdevice = MDPY_PCI_SUBDEVICE_ID,
+ }, {
+ /* end of list */
+ }
+};
+
+static struct pci_driver mdpy_fb_pci_driver = {
+ .name = "mdpy-fb",
+ .id_table = mdpy_fb_pci_table,
+ .probe = mdpy_fb_probe,
+ .remove = mdpy_fb_remove,
+};
+
+static int __init mdpy_fb_init(void)
+{
+ int ret;
+
+ ret = pci_register_driver(&mdpy_fb_pci_driver);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+module_init(mdpy_fb_init);
+
+MODULE_DEVICE_TABLE(pci, mdpy_fb_pci_table);
+MODULE_LICENSE("GPL v2");
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
new file mode 100644
index 0000000000..064e1c0a7a
--- /dev/null
+++ b/samples/vfio-mdev/mdpy.c
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mediated virtual PCI display host device driver
+ *
+ * See mdpy-defs.h for device specs
+ *
+ * (c) Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * based on mtty driver which is:
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ * Author: Neo Jia <cjia@nvidia.com>
+ * Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/cdev.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <drm/drm_fourcc.h>
+#include "mdpy-defs.h"
+
+#define MDPY_NAME "mdpy"
+#define MDPY_CLASS_NAME "mdpy"
+
+#define MDPY_CONFIG_SPACE_SIZE 0xff
+#define MDPY_MEMORY_BAR_OFFSET PAGE_SIZE
+#define MDPY_DISPLAY_REGION 16
+
+#define STORE_LE16(addr, val) (*(u16 *)addr = val)
+#define STORE_LE32(addr, val) (*(u32 *)addr = val)
+
+
+MODULE_LICENSE("GPL v2");
+
+#define MDPY_TYPE_1 "vga"
+#define MDPY_TYPE_2 "xga"
+#define MDPY_TYPE_3 "hd"
+
+static struct mdpy_type {
+ struct mdev_type type;
+ u32 format;
+ u32 bytepp;
+ u32 width;
+ u32 height;
+} mdpy_types[] = {
+ {
+ .type.sysfs_name = MDPY_TYPE_1,
+ .type.pretty_name = MDPY_CLASS_NAME "-" MDPY_TYPE_1,
+ .format = DRM_FORMAT_XRGB8888,
+ .bytepp = 4,
+ .width = 640,
+ .height = 480,
+ }, {
+ .type.sysfs_name = MDPY_TYPE_2,
+ .type.pretty_name = MDPY_CLASS_NAME "-" MDPY_TYPE_2,
+ .format = DRM_FORMAT_XRGB8888,
+ .bytepp = 4,
+ .width = 1024,
+ .height = 768,
+ }, {
+ .type.sysfs_name = MDPY_TYPE_3,
+ .type.pretty_name = MDPY_CLASS_NAME "-" MDPY_TYPE_3,
+ .format = DRM_FORMAT_XRGB8888,
+ .bytepp = 4,
+ .width = 1920,
+ .height = 1080,
+ },
+};
+
+static struct mdev_type *mdpy_mdev_types[] = {
+ &mdpy_types[0].type,
+ &mdpy_types[1].type,
+ &mdpy_types[2].type,
+};
+
+static dev_t mdpy_devt;
+static struct class *mdpy_class;
+static struct cdev mdpy_cdev;
+static struct device mdpy_dev;
+static struct mdev_parent mdpy_parent;
+static const struct vfio_device_ops mdpy_dev_ops;
+
+/* State of each mdev device */
+struct mdev_state {
+ struct vfio_device vdev;
+ u8 *vconfig;
+ u32 bar_mask;
+ struct mutex ops_lock;
+ struct mdev_device *mdev;
+ struct vfio_device_info dev_info;
+
+ const struct mdpy_type *type;
+ u32 memsize;
+ void *memblk;
+};
+
+static void mdpy_create_config_space(struct mdev_state *mdev_state)
+{
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID],
+ MDPY_PCI_VENDOR_ID);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_DEVICE_ID],
+ MDPY_PCI_DEVICE_ID);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_VENDOR_ID],
+ MDPY_PCI_SUBVENDOR_ID);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_ID],
+ MDPY_PCI_SUBDEVICE_ID);
+
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_COMMAND],
+ PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_STATUS],
+ PCI_STATUS_CAP_LIST);
+ STORE_LE16((u16 *) &mdev_state->vconfig[PCI_CLASS_DEVICE],
+ PCI_CLASS_DISPLAY_OTHER);
+ mdev_state->vconfig[PCI_CLASS_REVISION] = 0x01;
+
+ STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_0],
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_32 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH);
+ mdev_state->bar_mask = ~(mdev_state->memsize) + 1;
+
+ /* vendor specific capability for the config registers */
+ mdev_state->vconfig[PCI_CAPABILITY_LIST] = MDPY_VENDORCAP_OFFSET;
+ mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 0] = 0x09; /* vendor cap */
+ mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 1] = 0x00; /* next ptr */
+ mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 2] = MDPY_VENDORCAP_SIZE;
+ STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_FORMAT_OFFSET],
+ mdev_state->type->format);
+ STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_WIDTH_OFFSET],
+ mdev_state->type->width);
+ STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_HEIGHT_OFFSET],
+ mdev_state->type->height);
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+ char *buf, u32 count)
+{
+ struct device *dev = mdev_dev(mdev_state->mdev);
+ u32 cfg_addr;
+
+ switch (offset) {
+ case PCI_BASE_ADDRESS_0:
+ cfg_addr = *(u32 *)buf;
+
+ if (cfg_addr == 0xffffffff) {
+ cfg_addr = (cfg_addr & mdev_state->bar_mask);
+ } else {
+ cfg_addr &= PCI_BASE_ADDRESS_MEM_MASK;
+ if (cfg_addr)
+ dev_info(dev, "BAR0 @ 0x%x\n", cfg_addr);
+ }
+
+ cfg_addr |= (mdev_state->vconfig[offset] &
+ ~PCI_BASE_ADDRESS_MEM_MASK);
+ STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+ break;
+ }
+}
+
+static ssize_t mdev_access(struct mdev_state *mdev_state, char *buf,
+ size_t count, loff_t pos, bool is_write)
+{
+ int ret = 0;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ if (pos < MDPY_CONFIG_SPACE_SIZE) {
+ if (is_write)
+ handle_pci_cfg_write(mdev_state, pos, buf, count);
+ else
+ memcpy(buf, (mdev_state->vconfig + pos), count);
+
+ } else if ((pos >= MDPY_MEMORY_BAR_OFFSET) &&
+ (pos + count <=
+ MDPY_MEMORY_BAR_OFFSET + mdev_state->memsize)) {
+ pos -= MDPY_MEMORY_BAR_OFFSET;
+ if (is_write)
+ memcpy(mdev_state->memblk, buf, count);
+ else
+ memcpy(buf, mdev_state->memblk, count);
+
+ } else {
+ dev_info(mdev_state->vdev.dev,
+ "%s: %s @0x%llx (unhandled)\n", __func__,
+ is_write ? "WR" : "RD", pos);
+ ret = -1;
+ goto accessfailed;
+ }
+
+ ret = count;
+
+
+accessfailed:
+ mutex_unlock(&mdev_state->ops_lock);
+
+ return ret;
+}
+
+static int mdpy_reset(struct mdev_state *mdev_state)
+{
+ u32 stride, i;
+
+ /* initialize with gray gradient */
+ stride = mdev_state->type->width * mdev_state->type->bytepp;
+ for (i = 0; i < mdev_state->type->height; i++)
+ memset(mdev_state->memblk + i * stride,
+ i * 255 / mdev_state->type->height,
+ stride);
+ return 0;
+}
+
+static int mdpy_init_dev(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ struct mdev_device *mdev = to_mdev_device(vdev->dev);
+ const struct mdpy_type *type =
+ container_of(mdev->type, struct mdpy_type, type);
+ u32 fbsize;
+ int ret = -ENOMEM;
+
+ mdev_state->vconfig = kzalloc(MDPY_CONFIG_SPACE_SIZE, GFP_KERNEL);
+ if (!mdev_state->vconfig)
+ return ret;
+
+ fbsize = roundup_pow_of_two(type->width * type->height * type->bytepp);
+
+ mdev_state->memblk = vmalloc_user(fbsize);
+ if (!mdev_state->memblk)
+ goto out_vconfig;
+
+ mutex_init(&mdev_state->ops_lock);
+ mdev_state->mdev = mdev;
+ mdev_state->type = type;
+ mdev_state->memsize = fbsize;
+ mdpy_create_config_space(mdev_state);
+ mdpy_reset(mdev_state);
+
+ dev_info(vdev->dev, "%s: %s (%dx%d)\n", __func__, type->type.pretty_name,
+ type->width, type->height);
+ return 0;
+
+out_vconfig:
+ kfree(mdev_state->vconfig);
+ return ret;
+}
+
+static int mdpy_probe(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state;
+ int ret;
+
+ mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev,
+ &mdpy_dev_ops);
+ if (IS_ERR(mdev_state))
+ return PTR_ERR(mdev_state);
+
+ ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
+ if (ret)
+ goto err_put_vdev;
+ dev_set_drvdata(&mdev->dev, mdev_state);
+ return 0;
+
+err_put_vdev:
+ vfio_put_device(&mdev_state->vdev);
+ return ret;
+}
+
+static void mdpy_release_dev(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ vfree(mdev_state->memblk);
+ kfree(mdev_state->vconfig);
+}
+
+static void mdpy_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);
+
+ dev_info(&mdev->dev, "%s\n", __func__);
+
+ vfio_unregister_group_dev(&mdev_state->vdev);
+ vfio_put_device(&mdev_state->vdev);
+}
+
+static ssize_t mdpy_read(struct vfio_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 1;
+ }
+
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+
+read_err:
+ return -EFAULT;
+}
+
+static ssize_t mdpy_write(struct vfio_device *vdev, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (char *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 1;
+ }
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+write_err:
+ return -EFAULT;
+}
+
+static int mdpy_mmap(struct vfio_device *vdev, struct vm_area_struct *vma)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ if (vma->vm_pgoff != MDPY_MEMORY_BAR_OFFSET >> PAGE_SHIFT)
+ return -EINVAL;
+ if (vma->vm_end < vma->vm_start)
+ return -EINVAL;
+ if (vma->vm_end - vma->vm_start > mdev_state->memsize)
+ return -EINVAL;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
+ return remap_vmalloc_range(vma, mdev_state->memblk, 0);
+}
+
+static int mdpy_get_region_info(struct mdev_state *mdev_state,
+ struct vfio_region_info *region_info,
+ u16 *cap_type_id, void **cap_type)
+{
+ if (region_info->index >= VFIO_PCI_NUM_REGIONS &&
+ region_info->index != MDPY_DISPLAY_REGION)
+ return -EINVAL;
+
+ switch (region_info->index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ region_info->offset = 0;
+ region_info->size = MDPY_CONFIG_SPACE_SIZE;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE);
+ break;
+ case VFIO_PCI_BAR0_REGION_INDEX:
+ case MDPY_DISPLAY_REGION:
+ region_info->offset = MDPY_MEMORY_BAR_OFFSET;
+ region_info->size = mdev_state->memsize;
+ region_info->flags = (VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP);
+ break;
+ default:
+ region_info->size = 0;
+ region_info->offset = 0;
+ region_info->flags = 0;
+ }
+
+ return 0;
+}
+
+static int mdpy_get_irq_info(struct vfio_irq_info *irq_info)
+{
+ irq_info->count = 0;
+ return 0;
+}
+
+static int mdpy_get_device_info(struct vfio_device_info *dev_info)
+{
+ dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+ dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+ dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+ return 0;
+}
+
+static int mdpy_query_gfx_plane(struct mdev_state *mdev_state,
+ struct vfio_device_gfx_plane_info *plane)
+{
+ if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) {
+ if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE |
+ VFIO_GFX_PLANE_TYPE_REGION))
+ return 0;
+ return -EINVAL;
+ }
+
+ if (plane->flags != VFIO_GFX_PLANE_TYPE_REGION)
+ return -EINVAL;
+
+ plane->drm_format = mdev_state->type->format;
+ plane->width = mdev_state->type->width;
+ plane->height = mdev_state->type->height;
+ plane->stride = (mdev_state->type->width *
+ mdev_state->type->bytepp);
+ plane->size = mdev_state->memsize;
+ plane->region_index = MDPY_DISPLAY_REGION;
+
+ /* unused */
+ plane->drm_format_mod = 0;
+ plane->x_pos = 0;
+ plane->y_pos = 0;
+ plane->x_hot = 0;
+ plane->y_hot = 0;
+
+ return 0;
+}
+
+static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret = 0;
+ unsigned long minsz;
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mdpy_get_device_info(&info);
+ if (ret)
+ return ret;
+
+ memcpy(&mdev_state->dev_info, &info, sizeof(info));
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info info;
+ u16 cap_type_id = 0;
+ void *cap_type = NULL;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mdpy_get_region_info(mdev_state, &info, &cap_type_id,
+ &cap_type);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if ((info.argsz < minsz) ||
+ (info.index >= mdev_state->dev_info.num_irqs))
+ return -EINVAL;
+
+ ret = mdpy_get_irq_info(&info);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_QUERY_GFX_PLANE:
+ {
+ struct vfio_device_gfx_plane_info plane;
+
+ minsz = offsetofend(struct vfio_device_gfx_plane_info,
+ region_index);
+
+ if (copy_from_user(&plane, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (plane.argsz < minsz)
+ return -EINVAL;
+
+ ret = mdpy_query_gfx_plane(mdev_state, &plane);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &plane, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_SET_IRQS:
+ return -EINVAL;
+
+ case VFIO_DEVICE_RESET:
+ return mdpy_reset(mdev_state);
+ }
+ return -ENOTTY;
+}
+
+static ssize_t
+resolution_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct mdev_state *mdev_state = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%dx%d\n",
+ mdev_state->type->width,
+ mdev_state->type->height);
+}
+static DEVICE_ATTR_RO(resolution);
+
+static struct attribute *mdev_dev_attrs[] = {
+ &dev_attr_resolution.attr,
+ NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+ .name = "vendor",
+ .attrs = mdev_dev_attrs,
+};
+
+static const struct attribute_group *mdev_dev_groups[] = {
+ &mdev_dev_group,
+ NULL,
+};
+
+static ssize_t mdpy_show_description(struct mdev_type *mtype, char *buf)
+{
+ struct mdpy_type *type = container_of(mtype, struct mdpy_type, type);
+
+ return sprintf(buf, "virtual display, %dx%d framebuffer\n",
+ type->width, type->height);
+}
+
+static const struct vfio_device_ops mdpy_dev_ops = {
+ .init = mdpy_init_dev,
+ .release = mdpy_release_dev,
+ .read = mdpy_read,
+ .write = mdpy_write,
+ .ioctl = mdpy_ioctl,
+ .mmap = mdpy_mmap,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
+ .detach_ioas = vfio_iommufd_emulated_detach_ioas,
+};
+
+static struct mdev_driver mdpy_driver = {
+ .device_api = VFIO_DEVICE_API_PCI_STRING,
+ .max_instances = 4,
+ .driver = {
+ .name = "mdpy",
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ .dev_groups = mdev_dev_groups,
+ },
+ .probe = mdpy_probe,
+ .remove = mdpy_remove,
+ .show_description = mdpy_show_description,
+};
+
+static const struct file_operations vd_fops = {
+ .owner = THIS_MODULE,
+};
+
+static void mdpy_device_release(struct device *dev)
+{
+ /* nothing */
+}
+
+static int __init mdpy_dev_init(void)
+{
+ int ret = 0;
+
+ ret = alloc_chrdev_region(&mdpy_devt, 0, MINORMASK + 1, MDPY_NAME);
+ if (ret < 0) {
+ pr_err("Error: failed to register mdpy_dev, err: %d\n", ret);
+ return ret;
+ }
+ cdev_init(&mdpy_cdev, &vd_fops);
+ cdev_add(&mdpy_cdev, mdpy_devt, MINORMASK + 1);
+ pr_info("%s: major %d\n", __func__, MAJOR(mdpy_devt));
+
+ ret = mdev_register_driver(&mdpy_driver);
+ if (ret)
+ goto err_cdev;
+
+ mdpy_class = class_create(MDPY_CLASS_NAME);
+ if (IS_ERR(mdpy_class)) {
+ pr_err("Error: failed to register mdpy_dev class\n");
+ ret = PTR_ERR(mdpy_class);
+ goto err_driver;
+ }
+ mdpy_dev.class = mdpy_class;
+ mdpy_dev.release = mdpy_device_release;
+ dev_set_name(&mdpy_dev, "%s", MDPY_NAME);
+
+ ret = device_register(&mdpy_dev);
+ if (ret)
+ goto err_put;
+
+ ret = mdev_register_parent(&mdpy_parent, &mdpy_dev, &mdpy_driver,
+ mdpy_mdev_types,
+ ARRAY_SIZE(mdpy_mdev_types));
+ if (ret)
+ goto err_device;
+
+ return 0;
+
+err_device:
+ device_del(&mdpy_dev);
+err_put:
+ put_device(&mdpy_dev);
+ class_destroy(mdpy_class);
+err_driver:
+ mdev_unregister_driver(&mdpy_driver);
+err_cdev:
+ cdev_del(&mdpy_cdev);
+ unregister_chrdev_region(mdpy_devt, MINORMASK + 1);
+ return ret;
+}
+
+static void __exit mdpy_dev_exit(void)
+{
+ mdpy_dev.bus = NULL;
+ mdev_unregister_parent(&mdpy_parent);
+
+ device_unregister(&mdpy_dev);
+ mdev_unregister_driver(&mdpy_driver);
+ cdev_del(&mdpy_cdev);
+ unregister_chrdev_region(mdpy_devt, MINORMASK + 1);
+ class_destroy(mdpy_class);
+ mdpy_class = NULL;
+}
+
+module_param_named(count, mdpy_driver.max_instances, int, 0444);
+MODULE_PARM_DESC(count, "number of " MDPY_NAME " devices");
+
+module_init(mdpy_dev_init)
+module_exit(mdpy_dev_exit)
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
new file mode 100644
index 0000000000..245db52bed
--- /dev/null
+++ b/samples/vfio-mdev/mtty.c
@@ -0,0 +1,1473 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Mediated virtual PCI serial host device driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ * Author: Neo Jia <cjia@nvidia.com>
+ * Kirti Wankhede <kwankhede@nvidia.com>
+ *
+ * Sample driver that creates mdev device that simulates serial port over PCI
+ * card.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/sysfs.h>
+#include <linux/ctype.h>
+#include <linux/file.h>
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include <linux/serial.h>
+#include <uapi/linux/serial_reg.h>
+#include <linux/eventfd.h>
+/*
+ * #defines
+ */
+
+#define VERSION_STRING "0.1"
+#define DRIVER_AUTHOR "NVIDIA Corporation"
+
+#define MTTY_CLASS_NAME "mtty"
+
+#define MTTY_NAME "mtty"
+
+#define MTTY_STRING_LEN 16
+
+#define MTTY_CONFIG_SPACE_SIZE 0xff
+#define MTTY_IO_BAR_SIZE 0x8
+#define MTTY_MMIO_BAR_SIZE 0x100000
+
+#define STORE_LE16(addr, val) (*(u16 *)addr = val)
+#define STORE_LE32(addr, val) (*(u32 *)addr = val)
+
+#define MAX_FIFO_SIZE 16
+
+#define CIRCULAR_BUF_INC_IDX(idx) (idx = (idx + 1) & (MAX_FIFO_SIZE - 1))
+
+#define MTTY_VFIO_PCI_OFFSET_SHIFT 40
+
+#define MTTY_VFIO_PCI_OFFSET_TO_INDEX(off) (off >> MTTY_VFIO_PCI_OFFSET_SHIFT)
+#define MTTY_VFIO_PCI_INDEX_TO_OFFSET(index) \
+ ((u64)(index) << MTTY_VFIO_PCI_OFFSET_SHIFT)
+#define MTTY_VFIO_PCI_OFFSET_MASK \
+ (((u64)(1) << MTTY_VFIO_PCI_OFFSET_SHIFT) - 1)
+#define MAX_MTTYS 24
+
+/*
+ * Global Structures
+ */
+
+static struct mtty_dev {
+ dev_t vd_devt;
+ struct class *vd_class;
+ struct cdev vd_cdev;
+ struct idr vd_idr;
+ struct device dev;
+ struct mdev_parent parent;
+} mtty_dev;
+
+struct mdev_region_info {
+ u64 start;
+ u64 phys_start;
+ u32 size;
+ u64 vfio_offset;
+};
+
+#if defined(DEBUG_REGS)
+static const char *wr_reg[] = {
+ "TX",
+ "IER",
+ "FCR",
+ "LCR",
+ "MCR",
+ "LSR",
+ "MSR",
+ "SCR"
+};
+
+static const char *rd_reg[] = {
+ "RX",
+ "IER",
+ "IIR",
+ "LCR",
+ "MCR",
+ "LSR",
+ "MSR",
+ "SCR"
+};
+#endif
+
+/* loop back buffer */
+struct rxtx {
+ u8 fifo[MAX_FIFO_SIZE];
+ u8 head, tail;
+ u8 count;
+};
+
+struct serial_port {
+ u8 uart_reg[8]; /* 8 registers */
+ struct rxtx rxtx; /* loop back buffer */
+ bool dlab;
+ bool overrun;
+ u16 divisor;
+ u8 fcr; /* FIFO control register */
+ u8 max_fifo_size;
+ u8 intr_trigger_level; /* interrupt trigger level */
+};
+
+/* State of each mdev device */
+struct mdev_state {
+ struct vfio_device vdev;
+ struct eventfd_ctx *intx_evtfd;
+ struct eventfd_ctx *msi_evtfd;
+ int irq_index;
+ u8 *vconfig;
+ struct mutex ops_lock;
+ struct mdev_device *mdev;
+ struct mdev_region_info region_info[VFIO_PCI_NUM_REGIONS];
+ u32 bar_mask[VFIO_PCI_NUM_REGIONS];
+ struct list_head next;
+ struct serial_port s[2];
+ struct mutex rxtx_lock;
+ struct vfio_device_info dev_info;
+ int nr_ports;
+ u8 intx_mask:1;
+};
+
+static struct mtty_type {
+ struct mdev_type type;
+ int nr_ports;
+} mtty_types[2] = {
+ { .nr_ports = 1, .type.sysfs_name = "1",
+ .type.pretty_name = "Single port serial" },
+ { .nr_ports = 2, .type.sysfs_name = "2",
+ .type.pretty_name = "Dual port serial" },
+};
+
+static struct mdev_type *mtty_mdev_types[] = {
+ &mtty_types[0].type,
+ &mtty_types[1].type,
+};
+
+static atomic_t mdev_avail_ports = ATOMIC_INIT(MAX_MTTYS);
+
+static const struct file_operations vd_fops = {
+ .owner = THIS_MODULE,
+};
+
+static const struct vfio_device_ops mtty_dev_ops;
+
+/* Helper functions */
+
+static void dump_buffer(u8 *buf, uint32_t count)
+{
+#if defined(DEBUG)
+ int i;
+
+ pr_info("Buffer:\n");
+ for (i = 0; i < count; i++) {
+ pr_info("%2x ", *(buf + i));
+ if ((i + 1) % 16 == 0)
+ pr_info("\n");
+ }
+#endif
+}
+
+static bool is_intx(struct mdev_state *mdev_state)
+{
+ return mdev_state->irq_index == VFIO_PCI_INTX_IRQ_INDEX;
+}
+
+static bool is_msi(struct mdev_state *mdev_state)
+{
+ return mdev_state->irq_index == VFIO_PCI_MSI_IRQ_INDEX;
+}
+
+static bool is_noirq(struct mdev_state *mdev_state)
+{
+ return !is_intx(mdev_state) && !is_msi(mdev_state);
+}
+
+static void mtty_trigger_interrupt(struct mdev_state *mdev_state)
+{
+ lockdep_assert_held(&mdev_state->ops_lock);
+
+ if (is_msi(mdev_state)) {
+ if (mdev_state->msi_evtfd)
+ eventfd_signal(mdev_state->msi_evtfd, 1);
+ } else if (is_intx(mdev_state)) {
+ if (mdev_state->intx_evtfd && !mdev_state->intx_mask) {
+ eventfd_signal(mdev_state->intx_evtfd, 1);
+ mdev_state->intx_mask = true;
+ }
+ }
+}
+
+static void mtty_create_config_space(struct mdev_state *mdev_state)
+{
+ /* PCI dev ID */
+ STORE_LE32((u32 *) &mdev_state->vconfig[0x0], 0x32534348);
+
+ /* Control: I/O+, Mem-, BusMaster- */
+ STORE_LE16((u16 *) &mdev_state->vconfig[0x4], 0x0001);
+
+ /* Status: capabilities list absent */
+ STORE_LE16((u16 *) &mdev_state->vconfig[0x6], 0x0200);
+
+ /* Rev ID */
+ mdev_state->vconfig[0x8] = 0x10;
+
+ /* programming interface class : 16550-compatible serial controller */
+ mdev_state->vconfig[0x9] = 0x02;
+
+ /* Sub class : 00 */
+ mdev_state->vconfig[0xa] = 0x00;
+
+ /* Base class : Simple Communication controllers */
+ mdev_state->vconfig[0xb] = 0x07;
+
+ /* base address registers */
+ /* BAR0: IO space */
+ STORE_LE32((u32 *) &mdev_state->vconfig[0x10], 0x000001);
+ mdev_state->bar_mask[0] = ~(MTTY_IO_BAR_SIZE) + 1;
+
+ if (mdev_state->nr_ports == 2) {
+ /* BAR1: IO space */
+ STORE_LE32((u32 *) &mdev_state->vconfig[0x14], 0x000001);
+ mdev_state->bar_mask[1] = ~(MTTY_IO_BAR_SIZE) + 1;
+ }
+
+ /* Subsystem ID */
+ STORE_LE32((u32 *) &mdev_state->vconfig[0x2c], 0x32534348);
+
+ mdev_state->vconfig[0x34] = 0x00; /* Cap Ptr */
+ mdev_state->vconfig[0x3d] = 0x01; /* interrupt pin (INTA#) */
+
+ /* Vendor specific data */
+ mdev_state->vconfig[0x40] = 0x23;
+ mdev_state->vconfig[0x43] = 0x80;
+ mdev_state->vconfig[0x44] = 0x23;
+ mdev_state->vconfig[0x48] = 0x23;
+ mdev_state->vconfig[0x4c] = 0x23;
+
+ mdev_state->vconfig[0x60] = 0x50;
+ mdev_state->vconfig[0x61] = 0x43;
+ mdev_state->vconfig[0x62] = 0x49;
+ mdev_state->vconfig[0x63] = 0x20;
+ mdev_state->vconfig[0x64] = 0x53;
+ mdev_state->vconfig[0x65] = 0x65;
+ mdev_state->vconfig[0x66] = 0x72;
+ mdev_state->vconfig[0x67] = 0x69;
+ mdev_state->vconfig[0x68] = 0x61;
+ mdev_state->vconfig[0x69] = 0x6c;
+ mdev_state->vconfig[0x6a] = 0x2f;
+ mdev_state->vconfig[0x6b] = 0x55;
+ mdev_state->vconfig[0x6c] = 0x41;
+ mdev_state->vconfig[0x6d] = 0x52;
+ mdev_state->vconfig[0x6e] = 0x54;
+}
+
+static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset,
+ u8 *buf, u32 count)
+{
+ u32 cfg_addr, bar_mask, bar_index = 0;
+
+ switch (offset) {
+ case 0x04: /* device control */
+ case 0x06: /* device status */
+ /* do nothing */
+ break;
+ case 0x3c: /* interrupt line */
+ mdev_state->vconfig[0x3c] = buf[0];
+ break;
+ case 0x3d:
+ /*
+ * Interrupt Pin is hardwired to INTA.
+ * This field is write protected by hardware
+ */
+ break;
+ case 0x10: /* BAR0 */
+ case 0x14: /* BAR1 */
+ if (offset == 0x10)
+ bar_index = 0;
+ else if (offset == 0x14)
+ bar_index = 1;
+
+ if ((mdev_state->nr_ports == 1) && (bar_index == 1)) {
+ STORE_LE32(&mdev_state->vconfig[offset], 0);
+ break;
+ }
+
+ cfg_addr = *(u32 *)buf;
+ pr_info("BAR%d addr 0x%x\n", bar_index, cfg_addr);
+
+ if (cfg_addr == 0xffffffff) {
+ bar_mask = mdev_state->bar_mask[bar_index];
+ cfg_addr = (cfg_addr & bar_mask);
+ }
+
+ cfg_addr |= (mdev_state->vconfig[offset] & 0x3ul);
+ STORE_LE32(&mdev_state->vconfig[offset], cfg_addr);
+ break;
+ case 0x18: /* BAR2 */
+ case 0x1c: /* BAR3 */
+ case 0x20: /* BAR4 */
+ STORE_LE32(&mdev_state->vconfig[offset], 0);
+ break;
+ default:
+ pr_info("PCI config write @0x%x of %d bytes not handled\n",
+ offset, count);
+ break;
+ }
+}
+
+static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state,
+ u16 offset, u8 *buf, u32 count)
+{
+ u8 data = *buf;
+
+ /* Handle data written by guest */
+ switch (offset) {
+ case UART_TX:
+ /* if DLAB set, data is LSB of divisor */
+ if (mdev_state->s[index].dlab) {
+ mdev_state->s[index].divisor |= data;
+ break;
+ }
+
+ mutex_lock(&mdev_state->rxtx_lock);
+
+ /* save in TX buffer */
+ if (mdev_state->s[index].rxtx.count <
+ mdev_state->s[index].max_fifo_size) {
+ mdev_state->s[index].rxtx.fifo[
+ mdev_state->s[index].rxtx.head] = data;
+ mdev_state->s[index].rxtx.count++;
+ CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.head);
+ mdev_state->s[index].overrun = false;
+
+ /*
+ * Trigger interrupt if receive data interrupt is
+ * enabled and fifo reached trigger level
+ */
+ if ((mdev_state->s[index].uart_reg[UART_IER] &
+ UART_IER_RDI) &&
+ (mdev_state->s[index].rxtx.count ==
+ mdev_state->s[index].intr_trigger_level)) {
+ /* trigger interrupt */
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: Fifo level trigger\n",
+ index);
+#endif
+ mtty_trigger_interrupt(mdev_state);
+ }
+ } else {
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: Buffer Overflow\n", index);
+#endif
+ mdev_state->s[index].overrun = true;
+
+ /*
+ * Trigger interrupt if receiver line status interrupt
+ * is enabled
+ */
+ if (mdev_state->s[index].uart_reg[UART_IER] &
+ UART_IER_RLSI)
+ mtty_trigger_interrupt(mdev_state);
+ }
+ mutex_unlock(&mdev_state->rxtx_lock);
+ break;
+
+ case UART_IER:
+ /* if DLAB set, data is MSB of divisor */
+ if (mdev_state->s[index].dlab)
+ mdev_state->s[index].divisor |= (u16)data << 8;
+ else {
+ mdev_state->s[index].uart_reg[offset] = data;
+ mutex_lock(&mdev_state->rxtx_lock);
+ if ((data & UART_IER_THRI) &&
+ (mdev_state->s[index].rxtx.head ==
+ mdev_state->s[index].rxtx.tail)) {
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: IER_THRI write\n",
+ index);
+#endif
+ mtty_trigger_interrupt(mdev_state);
+ }
+
+ mutex_unlock(&mdev_state->rxtx_lock);
+ }
+
+ break;
+
+ case UART_FCR:
+ mdev_state->s[index].fcr = data;
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ if (data & (UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT)) {
+ /* clear loop back FIFO */
+ mdev_state->s[index].rxtx.count = 0;
+ mdev_state->s[index].rxtx.head = 0;
+ mdev_state->s[index].rxtx.tail = 0;
+ }
+ mutex_unlock(&mdev_state->rxtx_lock);
+
+ switch (data & UART_FCR_TRIGGER_MASK) {
+ case UART_FCR_TRIGGER_1:
+ mdev_state->s[index].intr_trigger_level = 1;
+ break;
+
+ case UART_FCR_TRIGGER_4:
+ mdev_state->s[index].intr_trigger_level = 4;
+ break;
+
+ case UART_FCR_TRIGGER_8:
+ mdev_state->s[index].intr_trigger_level = 8;
+ break;
+
+ case UART_FCR_TRIGGER_14:
+ mdev_state->s[index].intr_trigger_level = 14;
+ break;
+ }
+
+ /*
+ * Set trigger level to 1 otherwise or implement timer with
+ * timeout of 4 characters and on expiring that timer set
+ * Recevice data timeout in IIR register
+ */
+ mdev_state->s[index].intr_trigger_level = 1;
+ if (data & UART_FCR_ENABLE_FIFO)
+ mdev_state->s[index].max_fifo_size = MAX_FIFO_SIZE;
+ else {
+ mdev_state->s[index].max_fifo_size = 1;
+ mdev_state->s[index].intr_trigger_level = 1;
+ }
+
+ break;
+
+ case UART_LCR:
+ if (data & UART_LCR_DLAB) {
+ mdev_state->s[index].dlab = true;
+ mdev_state->s[index].divisor = 0;
+ } else
+ mdev_state->s[index].dlab = false;
+
+ mdev_state->s[index].uart_reg[offset] = data;
+ break;
+
+ case UART_MCR:
+ mdev_state->s[index].uart_reg[offset] = data;
+
+ if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
+ (data & UART_MCR_OUT2)) {
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: MCR_OUT2 write\n", index);
+#endif
+ mtty_trigger_interrupt(mdev_state);
+ }
+
+ if ((mdev_state->s[index].uart_reg[UART_IER] & UART_IER_MSI) &&
+ (data & (UART_MCR_RTS | UART_MCR_DTR))) {
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: MCR RTS/DTR write\n", index);
+#endif
+ mtty_trigger_interrupt(mdev_state);
+ }
+ break;
+
+ case UART_LSR:
+ case UART_MSR:
+ /* do nothing */
+ break;
+
+ case UART_SCR:
+ mdev_state->s[index].uart_reg[offset] = data;
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state,
+ u16 offset, u8 *buf, u32 count)
+{
+ /* Handle read requests by guest */
+ switch (offset) {
+ case UART_RX:
+ /* if DLAB set, data is LSB of divisor */
+ if (mdev_state->s[index].dlab) {
+ *buf = (u8)mdev_state->s[index].divisor;
+ break;
+ }
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ /* return data in tx buffer */
+ if (mdev_state->s[index].rxtx.head !=
+ mdev_state->s[index].rxtx.tail) {
+ *buf = mdev_state->s[index].rxtx.fifo[
+ mdev_state->s[index].rxtx.tail];
+ mdev_state->s[index].rxtx.count--;
+ CIRCULAR_BUF_INC_IDX(mdev_state->s[index].rxtx.tail);
+ }
+
+ if (mdev_state->s[index].rxtx.head ==
+ mdev_state->s[index].rxtx.tail) {
+ /*
+ * Trigger interrupt if tx buffer empty interrupt is
+ * enabled and fifo is empty
+ */
+#if defined(DEBUG_INTR)
+ pr_err("Serial port %d: Buffer Empty\n", index);
+#endif
+ if (mdev_state->s[index].uart_reg[UART_IER] &
+ UART_IER_THRI)
+ mtty_trigger_interrupt(mdev_state);
+ }
+ mutex_unlock(&mdev_state->rxtx_lock);
+
+ break;
+
+ case UART_IER:
+ if (mdev_state->s[index].dlab) {
+ *buf = (u8)(mdev_state->s[index].divisor >> 8);
+ break;
+ }
+ *buf = mdev_state->s[index].uart_reg[offset] & 0x0f;
+ break;
+
+ case UART_IIR:
+ {
+ u8 ier = mdev_state->s[index].uart_reg[UART_IER];
+ *buf = 0;
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ /* Interrupt priority 1: Parity, overrun, framing or break */
+ if ((ier & UART_IER_RLSI) && mdev_state->s[index].overrun)
+ *buf |= UART_IIR_RLSI;
+
+ /* Interrupt priority 2: Fifo trigger level reached */
+ if ((ier & UART_IER_RDI) &&
+ (mdev_state->s[index].rxtx.count >=
+ mdev_state->s[index].intr_trigger_level))
+ *buf |= UART_IIR_RDI;
+
+ /* Interrupt priotiry 3: transmitter holding register empty */
+ if ((ier & UART_IER_THRI) &&
+ (mdev_state->s[index].rxtx.head ==
+ mdev_state->s[index].rxtx.tail))
+ *buf |= UART_IIR_THRI;
+
+ /* Interrupt priotiry 4: Modem status: CTS, DSR, RI or DCD */
+ if ((ier & UART_IER_MSI) &&
+ (mdev_state->s[index].uart_reg[UART_MCR] &
+ (UART_MCR_RTS | UART_MCR_DTR)))
+ *buf |= UART_IIR_MSI;
+
+ /* bit0: 0=> interrupt pending, 1=> no interrupt is pending */
+ if (*buf == 0)
+ *buf = UART_IIR_NO_INT;
+
+ /* set bit 6 & 7 to be 16550 compatible */
+ *buf |= 0xC0;
+ mutex_unlock(&mdev_state->rxtx_lock);
+ }
+ break;
+
+ case UART_LCR:
+ case UART_MCR:
+ *buf = mdev_state->s[index].uart_reg[offset];
+ break;
+
+ case UART_LSR:
+ {
+ u8 lsr = 0;
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ /* atleast one char in FIFO */
+ if (mdev_state->s[index].rxtx.head !=
+ mdev_state->s[index].rxtx.tail)
+ lsr |= UART_LSR_DR;
+
+ /* if FIFO overrun */
+ if (mdev_state->s[index].overrun)
+ lsr |= UART_LSR_OE;
+
+ /* transmit FIFO empty and tramsitter empty */
+ if (mdev_state->s[index].rxtx.head ==
+ mdev_state->s[index].rxtx.tail)
+ lsr |= UART_LSR_TEMT | UART_LSR_THRE;
+
+ mutex_unlock(&mdev_state->rxtx_lock);
+ *buf = lsr;
+ break;
+ }
+ case UART_MSR:
+ *buf = UART_MSR_DSR | UART_MSR_DDSR | UART_MSR_DCD;
+
+ mutex_lock(&mdev_state->rxtx_lock);
+ /* if AFE is 1 and FIFO have space, set CTS bit */
+ if (mdev_state->s[index].uart_reg[UART_MCR] &
+ UART_MCR_AFE) {
+ if (mdev_state->s[index].rxtx.count <
+ mdev_state->s[index].max_fifo_size)
+ *buf |= UART_MSR_CTS | UART_MSR_DCTS;
+ } else
+ *buf |= UART_MSR_CTS | UART_MSR_DCTS;
+ mutex_unlock(&mdev_state->rxtx_lock);
+
+ break;
+
+ case UART_SCR:
+ *buf = mdev_state->s[index].uart_reg[offset];
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void mdev_read_base(struct mdev_state *mdev_state)
+{
+ int index, pos;
+ u32 start_lo, start_hi;
+ u32 mem_type;
+
+ pos = PCI_BASE_ADDRESS_0;
+
+ for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++) {
+
+ if (!mdev_state->region_info[index].size)
+ continue;
+
+ start_lo = (*(u32 *)(mdev_state->vconfig + pos)) &
+ PCI_BASE_ADDRESS_MEM_MASK;
+ mem_type = (*(u32 *)(mdev_state->vconfig + pos)) &
+ PCI_BASE_ADDRESS_MEM_TYPE_MASK;
+
+ switch (mem_type) {
+ case PCI_BASE_ADDRESS_MEM_TYPE_64:
+ start_hi = (*(u32 *)(mdev_state->vconfig + pos + 4));
+ pos += 4;
+ break;
+ case PCI_BASE_ADDRESS_MEM_TYPE_32:
+ case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+ /* 1M mem BAR treated as 32-bit BAR */
+ default:
+ /* mem unknown type treated as 32-bit BAR */
+ start_hi = 0;
+ break;
+ }
+ pos += 4;
+ mdev_state->region_info[index].start = ((u64)start_hi << 32) |
+ start_lo;
+ }
+}
+
+static ssize_t mdev_access(struct mdev_state *mdev_state, u8 *buf, size_t count,
+ loff_t pos, bool is_write)
+{
+ unsigned int index;
+ loff_t offset;
+ int ret = 0;
+
+ if (!buf)
+ return -EINVAL;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ index = MTTY_VFIO_PCI_OFFSET_TO_INDEX(pos);
+ offset = pos & MTTY_VFIO_PCI_OFFSET_MASK;
+ switch (index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+
+#if defined(DEBUG)
+ pr_info("%s: PCI config space %s at offset 0x%llx\n",
+ __func__, is_write ? "write" : "read", offset);
+#endif
+ if (is_write) {
+ dump_buffer(buf, count);
+ handle_pci_cfg_write(mdev_state, offset, buf, count);
+ } else {
+ memcpy(buf, (mdev_state->vconfig + offset), count);
+ dump_buffer(buf, count);
+ }
+
+ break;
+
+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+ if (!mdev_state->region_info[index].start)
+ mdev_read_base(mdev_state);
+
+ if (is_write) {
+ dump_buffer(buf, count);
+
+#if defined(DEBUG_REGS)
+ pr_info("%s: BAR%d WR @0x%llx %s val:0x%02x dlab:%d\n",
+ __func__, index, offset, wr_reg[offset],
+ *buf, mdev_state->s[index].dlab);
+#endif
+ handle_bar_write(index, mdev_state, offset, buf, count);
+ } else {
+ handle_bar_read(index, mdev_state, offset, buf, count);
+ dump_buffer(buf, count);
+
+#if defined(DEBUG_REGS)
+ pr_info("%s: BAR%d RD @0x%llx %s val:0x%02x dlab:%d\n",
+ __func__, index, offset, rd_reg[offset],
+ *buf, mdev_state->s[index].dlab);
+#endif
+ }
+ break;
+
+ default:
+ ret = -1;
+ goto accessfailed;
+ }
+
+ ret = count;
+
+
+accessfailed:
+ mutex_unlock(&mdev_state->ops_lock);
+
+ return ret;
+}
+
+static int mtty_init_dev(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ struct mdev_device *mdev = to_mdev_device(vdev->dev);
+ struct mtty_type *type =
+ container_of(mdev->type, struct mtty_type, type);
+ int avail_ports = atomic_read(&mdev_avail_ports);
+ int ret;
+
+ do {
+ if (avail_ports < type->nr_ports)
+ return -ENOSPC;
+ } while (!atomic_try_cmpxchg(&mdev_avail_ports,
+ &avail_ports,
+ avail_ports - type->nr_ports));
+
+ mdev_state->nr_ports = type->nr_ports;
+ mdev_state->irq_index = -1;
+ mdev_state->s[0].max_fifo_size = MAX_FIFO_SIZE;
+ mdev_state->s[1].max_fifo_size = MAX_FIFO_SIZE;
+ mutex_init(&mdev_state->rxtx_lock);
+
+ mdev_state->vconfig = kzalloc(MTTY_CONFIG_SPACE_SIZE, GFP_KERNEL);
+ if (!mdev_state->vconfig) {
+ ret = -ENOMEM;
+ goto err_nr_ports;
+ }
+
+ mutex_init(&mdev_state->ops_lock);
+ mdev_state->mdev = mdev;
+ mtty_create_config_space(mdev_state);
+ return 0;
+
+err_nr_ports:
+ atomic_add(type->nr_ports, &mdev_avail_ports);
+ return ret;
+}
+
+static int mtty_probe(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state;
+ int ret;
+
+ mdev_state = vfio_alloc_device(mdev_state, vdev, &mdev->dev,
+ &mtty_dev_ops);
+ if (IS_ERR(mdev_state))
+ return PTR_ERR(mdev_state);
+
+ ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
+ if (ret)
+ goto err_put_vdev;
+ dev_set_drvdata(&mdev->dev, mdev_state);
+ return 0;
+
+err_put_vdev:
+ vfio_put_device(&mdev_state->vdev);
+ return ret;
+}
+
+static void mtty_release_dev(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ atomic_add(mdev_state->nr_ports, &mdev_avail_ports);
+ kfree(mdev_state->vconfig);
+}
+
+static void mtty_remove(struct mdev_device *mdev)
+{
+ struct mdev_state *mdev_state = dev_get_drvdata(&mdev->dev);
+
+ vfio_unregister_group_dev(&mdev_state->vdev);
+ vfio_put_device(&mdev_state->vdev);
+}
+
+static int mtty_reset(struct mdev_state *mdev_state)
+{
+ pr_info("%s: called\n", __func__);
+
+ return 0;
+}
+
+static ssize_t mtty_read(struct vfio_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
+ *ppos, false);
+ if (ret <= 0)
+ goto read_err;
+
+ if (copy_to_user(buf, &val, sizeof(val)))
+ goto read_err;
+
+ filled = 1;
+ }
+
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+
+read_err:
+ return -EFAULT;
+}
+
+static ssize_t mtty_write(struct vfio_device *vdev, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ unsigned int done = 0;
+ int ret;
+
+ while (count) {
+ size_t filled;
+
+ if (count >= 4 && !(*ppos % 4)) {
+ u32 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ u16 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 2;
+ } else {
+ u8 val;
+
+ if (copy_from_user(&val, buf, sizeof(val)))
+ goto write_err;
+
+ ret = mdev_access(mdev_state, (u8 *)&val, sizeof(val),
+ *ppos, true);
+ if (ret <= 0)
+ goto write_err;
+
+ filled = 1;
+ }
+ count -= filled;
+ done += filled;
+ *ppos += filled;
+ buf += filled;
+ }
+
+ return done;
+write_err:
+ return -EFAULT;
+}
+
+static void mtty_disable_intx(struct mdev_state *mdev_state)
+{
+ if (mdev_state->intx_evtfd) {
+ eventfd_ctx_put(mdev_state->intx_evtfd);
+ mdev_state->intx_evtfd = NULL;
+ mdev_state->intx_mask = false;
+ mdev_state->irq_index = -1;
+ }
+}
+
+static void mtty_disable_msi(struct mdev_state *mdev_state)
+{
+ if (mdev_state->msi_evtfd) {
+ eventfd_ctx_put(mdev_state->msi_evtfd);
+ mdev_state->msi_evtfd = NULL;
+ mdev_state->irq_index = -1;
+ }
+}
+
+static int mtty_set_irqs(struct mdev_state *mdev_state, uint32_t flags,
+ unsigned int index, unsigned int start,
+ unsigned int count, void *data)
+{
+ int ret = 0;
+
+ mutex_lock(&mdev_state->ops_lock);
+ switch (index) {
+ case VFIO_PCI_INTX_IRQ_INDEX:
+ switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+ case VFIO_IRQ_SET_ACTION_MASK:
+ if (!is_intx(mdev_state) || start != 0 || count != 1) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ mdev_state->intx_mask = true;
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ uint8_t mask = *(uint8_t *)data;
+
+ if (mask)
+ mdev_state->intx_mask = true;
+ } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ ret = -ENOTTY; /* No support for mask fd */
+ }
+ break;
+ case VFIO_IRQ_SET_ACTION_UNMASK:
+ if (!is_intx(mdev_state) || start != 0 || count != 1) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ mdev_state->intx_mask = false;
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ uint8_t mask = *(uint8_t *)data;
+
+ if (mask)
+ mdev_state->intx_mask = false;
+ } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ ret = -ENOTTY; /* No support for unmask fd */
+ }
+ break;
+ case VFIO_IRQ_SET_ACTION_TRIGGER:
+ if (is_intx(mdev_state) && !count &&
+ (flags & VFIO_IRQ_SET_DATA_NONE)) {
+ mtty_disable_intx(mdev_state);
+ break;
+ }
+
+ if (!(is_intx(mdev_state) || is_noirq(mdev_state)) ||
+ start != 0 || count != 1) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ int fd = *(int *)data;
+ struct eventfd_ctx *evt;
+
+ mtty_disable_intx(mdev_state);
+
+ if (fd < 0)
+ break;
+
+ evt = eventfd_ctx_fdget(fd);
+ if (IS_ERR(evt)) {
+ ret = PTR_ERR(evt);
+ break;
+ }
+ mdev_state->intx_evtfd = evt;
+ mdev_state->irq_index = index;
+ break;
+ }
+
+ if (!is_intx(mdev_state)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ mtty_trigger_interrupt(mdev_state);
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ uint8_t trigger = *(uint8_t *)data;
+
+ if (trigger)
+ mtty_trigger_interrupt(mdev_state);
+ }
+ break;
+ }
+ break;
+ case VFIO_PCI_MSI_IRQ_INDEX:
+ switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+ case VFIO_IRQ_SET_ACTION_MASK:
+ case VFIO_IRQ_SET_ACTION_UNMASK:
+ ret = -ENOTTY;
+ break;
+ case VFIO_IRQ_SET_ACTION_TRIGGER:
+ if (is_msi(mdev_state) && !count &&
+ (flags & VFIO_IRQ_SET_DATA_NONE)) {
+ mtty_disable_msi(mdev_state);
+ break;
+ }
+
+ if (!(is_msi(mdev_state) || is_noirq(mdev_state)) ||
+ start != 0 || count != 1) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ int fd = *(int *)data;
+ struct eventfd_ctx *evt;
+
+ mtty_disable_msi(mdev_state);
+
+ if (fd < 0)
+ break;
+
+ evt = eventfd_ctx_fdget(fd);
+ if (IS_ERR(evt)) {
+ ret = PTR_ERR(evt);
+ break;
+ }
+ mdev_state->msi_evtfd = evt;
+ mdev_state->irq_index = index;
+ break;
+ }
+
+ if (!is_msi(mdev_state)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (flags & VFIO_IRQ_SET_DATA_NONE) {
+ mtty_trigger_interrupt(mdev_state);
+ } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+ uint8_t trigger = *(uint8_t *)data;
+
+ if (trigger)
+ mtty_trigger_interrupt(mdev_state);
+ }
+ break;
+ }
+ break;
+ case VFIO_PCI_MSIX_IRQ_INDEX:
+ dev_dbg(mdev_state->vdev.dev, "%s: MSIX_IRQ\n", __func__);
+ ret = -ENOTTY;
+ break;
+ case VFIO_PCI_ERR_IRQ_INDEX:
+ dev_dbg(mdev_state->vdev.dev, "%s: ERR_IRQ\n", __func__);
+ ret = -ENOTTY;
+ break;
+ case VFIO_PCI_REQ_IRQ_INDEX:
+ dev_dbg(mdev_state->vdev.dev, "%s: REQ_IRQ\n", __func__);
+ ret = -ENOTTY;
+ break;
+ }
+
+ mutex_unlock(&mdev_state->ops_lock);
+ return ret;
+}
+
+static int mtty_get_region_info(struct mdev_state *mdev_state,
+ struct vfio_region_info *region_info,
+ u16 *cap_type_id, void **cap_type)
+{
+ unsigned int size = 0;
+ u32 bar_index;
+
+ bar_index = region_info->index;
+ if (bar_index >= VFIO_PCI_NUM_REGIONS)
+ return -EINVAL;
+
+ mutex_lock(&mdev_state->ops_lock);
+
+ switch (bar_index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ size = MTTY_CONFIG_SPACE_SIZE;
+ break;
+ case VFIO_PCI_BAR0_REGION_INDEX:
+ size = MTTY_IO_BAR_SIZE;
+ break;
+ case VFIO_PCI_BAR1_REGION_INDEX:
+ if (mdev_state->nr_ports == 2)
+ size = MTTY_IO_BAR_SIZE;
+ break;
+ default:
+ size = 0;
+ break;
+ }
+
+ mdev_state->region_info[bar_index].size = size;
+ mdev_state->region_info[bar_index].vfio_offset =
+ MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
+
+ region_info->size = size;
+ region_info->offset = MTTY_VFIO_PCI_INDEX_TO_OFFSET(bar_index);
+ region_info->flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
+ mutex_unlock(&mdev_state->ops_lock);
+ return 0;
+}
+
+static int mtty_get_irq_info(struct vfio_irq_info *irq_info)
+{
+ if (irq_info->index != VFIO_PCI_INTX_IRQ_INDEX &&
+ irq_info->index != VFIO_PCI_MSI_IRQ_INDEX)
+ return -EINVAL;
+
+ irq_info->flags = VFIO_IRQ_INFO_EVENTFD;
+ irq_info->count = 1;
+
+ if (irq_info->index == VFIO_PCI_INTX_IRQ_INDEX)
+ irq_info->flags |= VFIO_IRQ_INFO_MASKABLE |
+ VFIO_IRQ_INFO_AUTOMASKED;
+ else
+ irq_info->flags |= VFIO_IRQ_INFO_NORESIZE;
+
+ return 0;
+}
+
+static int mtty_get_device_info(struct vfio_device_info *dev_info)
+{
+ dev_info->flags = VFIO_DEVICE_FLAGS_PCI;
+ dev_info->num_regions = VFIO_PCI_NUM_REGIONS;
+ dev_info->num_irqs = VFIO_PCI_NUM_IRQS;
+
+ return 0;
+}
+
+static long mtty_ioctl(struct vfio_device *vdev, unsigned int cmd,
+ unsigned long arg)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+ int ret = 0;
+ unsigned long minsz;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ {
+ struct vfio_device_info info;
+
+ minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mtty_get_device_info(&info);
+ if (ret)
+ return ret;
+
+ memcpy(&mdev_state->dev_info, &info, sizeof(info));
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+ case VFIO_DEVICE_GET_REGION_INFO:
+ {
+ struct vfio_region_info info;
+ u16 cap_type_id = 0;
+ void *cap_type = NULL;
+
+ minsz = offsetofend(struct vfio_region_info, offset);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (info.argsz < minsz)
+ return -EINVAL;
+
+ ret = mtty_get_region_info(mdev_state, &info, &cap_type_id,
+ &cap_type);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ {
+ struct vfio_irq_info info;
+
+ minsz = offsetofend(struct vfio_irq_info, count);
+
+ if (copy_from_user(&info, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if ((info.argsz < minsz) ||
+ (info.index >= mdev_state->dev_info.num_irqs))
+ return -EINVAL;
+
+ ret = mtty_get_irq_info(&info);
+ if (ret)
+ return ret;
+
+ if (copy_to_user((void __user *)arg, &info, minsz))
+ return -EFAULT;
+
+ return 0;
+ }
+ case VFIO_DEVICE_SET_IRQS:
+ {
+ struct vfio_irq_set hdr;
+ u8 *data = NULL, *ptr = NULL;
+ size_t data_size = 0;
+
+ minsz = offsetofend(struct vfio_irq_set, count);
+
+ if (copy_from_user(&hdr, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ ret = vfio_set_irqs_validate_and_prepare(&hdr,
+ mdev_state->dev_info.num_irqs,
+ VFIO_PCI_NUM_IRQS,
+ &data_size);
+ if (ret)
+ return ret;
+
+ if (data_size) {
+ ptr = data = memdup_user((void __user *)(arg + minsz),
+ data_size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ }
+
+ ret = mtty_set_irqs(mdev_state, hdr.flags, hdr.index, hdr.start,
+ hdr.count, data);
+
+ kfree(ptr);
+ return ret;
+ }
+ case VFIO_DEVICE_RESET:
+ return mtty_reset(mdev_state);
+ }
+ return -ENOTTY;
+}
+
+static ssize_t
+sample_mdev_dev_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "This is MDEV %s\n", dev_name(dev));
+}
+
+static DEVICE_ATTR_RO(sample_mdev_dev);
+
+static struct attribute *mdev_dev_attrs[] = {
+ &dev_attr_sample_mdev_dev.attr,
+ NULL,
+};
+
+static const struct attribute_group mdev_dev_group = {
+ .name = "vendor",
+ .attrs = mdev_dev_attrs,
+};
+
+static const struct attribute_group *mdev_dev_groups[] = {
+ &mdev_dev_group,
+ NULL,
+};
+
+static unsigned int mtty_get_available(struct mdev_type *mtype)
+{
+ struct mtty_type *type = container_of(mtype, struct mtty_type, type);
+
+ return atomic_read(&mdev_avail_ports) / type->nr_ports;
+}
+
+static void mtty_close(struct vfio_device *vdev)
+{
+ struct mdev_state *mdev_state =
+ container_of(vdev, struct mdev_state, vdev);
+
+ mtty_disable_intx(mdev_state);
+ mtty_disable_msi(mdev_state);
+}
+
+static const struct vfio_device_ops mtty_dev_ops = {
+ .name = "vfio-mtty",
+ .init = mtty_init_dev,
+ .release = mtty_release_dev,
+ .read = mtty_read,
+ .write = mtty_write,
+ .ioctl = mtty_ioctl,
+ .bind_iommufd = vfio_iommufd_emulated_bind,
+ .unbind_iommufd = vfio_iommufd_emulated_unbind,
+ .attach_ioas = vfio_iommufd_emulated_attach_ioas,
+ .detach_ioas = vfio_iommufd_emulated_detach_ioas,
+ .close_device = mtty_close,
+};
+
+static struct mdev_driver mtty_driver = {
+ .device_api = VFIO_DEVICE_API_PCI_STRING,
+ .driver = {
+ .name = "mtty",
+ .owner = THIS_MODULE,
+ .mod_name = KBUILD_MODNAME,
+ .dev_groups = mdev_dev_groups,
+ },
+ .probe = mtty_probe,
+ .remove = mtty_remove,
+ .get_available = mtty_get_available,
+};
+
+static void mtty_device_release(struct device *dev)
+{
+ dev_dbg(dev, "mtty: released\n");
+}
+
+static int __init mtty_dev_init(void)
+{
+ int ret = 0;
+
+ pr_info("mtty_dev: %s\n", __func__);
+
+ memset(&mtty_dev, 0, sizeof(mtty_dev));
+
+ idr_init(&mtty_dev.vd_idr);
+
+ ret = alloc_chrdev_region(&mtty_dev.vd_devt, 0, MINORMASK + 1,
+ MTTY_NAME);
+
+ if (ret < 0) {
+ pr_err("Error: failed to register mtty_dev, err:%d\n", ret);
+ return ret;
+ }
+
+ cdev_init(&mtty_dev.vd_cdev, &vd_fops);
+ cdev_add(&mtty_dev.vd_cdev, mtty_dev.vd_devt, MINORMASK + 1);
+
+ pr_info("major_number:%d\n", MAJOR(mtty_dev.vd_devt));
+
+ ret = mdev_register_driver(&mtty_driver);
+ if (ret)
+ goto err_cdev;
+
+ mtty_dev.vd_class = class_create(MTTY_CLASS_NAME);
+
+ if (IS_ERR(mtty_dev.vd_class)) {
+ pr_err("Error: failed to register mtty_dev class\n");
+ ret = PTR_ERR(mtty_dev.vd_class);
+ goto err_driver;
+ }
+
+ mtty_dev.dev.class = mtty_dev.vd_class;
+ mtty_dev.dev.release = mtty_device_release;
+ dev_set_name(&mtty_dev.dev, "%s", MTTY_NAME);
+
+ ret = device_register(&mtty_dev.dev);
+ if (ret)
+ goto err_put;
+
+ ret = mdev_register_parent(&mtty_dev.parent, &mtty_dev.dev,
+ &mtty_driver, mtty_mdev_types,
+ ARRAY_SIZE(mtty_mdev_types));
+ if (ret)
+ goto err_device;
+ return 0;
+
+err_device:
+ device_del(&mtty_dev.dev);
+err_put:
+ put_device(&mtty_dev.dev);
+ class_destroy(mtty_dev.vd_class);
+err_driver:
+ mdev_unregister_driver(&mtty_driver);
+err_cdev:
+ cdev_del(&mtty_dev.vd_cdev);
+ unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1);
+ return ret;
+}
+
+static void __exit mtty_dev_exit(void)
+{
+ mtty_dev.dev.bus = NULL;
+ mdev_unregister_parent(&mtty_dev.parent);
+
+ device_unregister(&mtty_dev.dev);
+ idr_destroy(&mtty_dev.vd_idr);
+ mdev_unregister_driver(&mtty_driver);
+ cdev_del(&mtty_dev.vd_cdev);
+ unregister_chrdev_region(mtty_dev.vd_devt, MINORMASK + 1);
+ class_destroy(mtty_dev.vd_class);
+ mtty_dev.vd_class = NULL;
+ pr_info("mtty_dev: Unloaded!\n");
+}
+
+module_init(mtty_dev_init)
+module_exit(mtty_dev_exit)
+
+MODULE_LICENSE("GPL v2");
+MODULE_INFO(supported, "Test driver that simulate serial port over PCI");
+MODULE_VERSION(VERSION_STRING);
+MODULE_AUTHOR(DRIVER_AUTHOR);