diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /drivers/misc/vmw_vmci/vmci_guest.c | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/misc/vmw_vmci/vmci_guest.c')
-rw-r--r-- | drivers/misc/vmw_vmci/vmci_guest.c | 978 |
1 files changed, 978 insertions, 0 deletions
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c new file mode 100644 index 000000000..4f8d962bb --- /dev/null +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -0,0 +1,978 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VMware VMCI Driver + * + * Copyright (C) 2012 VMware, Inc. All rights reserved. + */ + +#include <linux/vmw_vmci_defs.h> +#include <linux/vmw_vmci_api.h> +#include <linux/moduleparam.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/processor.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/io.h> +#include <linux/vmalloc.h> + +#include "vmci_datagram.h" +#include "vmci_doorbell.h" +#include "vmci_context.h" +#include "vmci_driver.h" +#include "vmci_event.h" + +#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740 + +#define VMCI_UTIL_NUM_RESOURCES 1 + +/* + * Datagram buffers for DMA send/receive must accommodate at least + * a maximum sized datagram and the header. + */ +#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE) + +static bool vmci_disable_msi; +module_param_named(disable_msi, vmci_disable_msi, bool, 0); +MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); + +static bool vmci_disable_msix; +module_param_named(disable_msix, vmci_disable_msix, bool, 0); +MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); + +static u32 ctx_update_sub_id = VMCI_INVALID_ID; +static u32 vm_context_id = VMCI_INVALID_ID; + +struct vmci_guest_device { + struct device *dev; /* PCI device we are attached to */ + void __iomem *iobase; + void __iomem *mmio_base; + + bool exclusive_vectors; + + struct wait_queue_head inout_wq; + + void *data_buffer; + dma_addr_t data_buffer_base; + void *tx_buffer; + dma_addr_t tx_buffer_base; + void *notification_bitmap; + dma_addr_t notification_base; +}; + +static bool use_ppn64; + +bool vmci_use_ppn64(void) +{ + return use_ppn64; +} + +/* vmci_dev singleton device and supporting data*/ +struct pci_dev *vmci_pdev; +static struct vmci_guest_device *vmci_dev_g; +static DEFINE_SPINLOCK(vmci_dev_spinlock); + +static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0); + +bool vmci_guest_code_active(void) +{ + return atomic_read(&vmci_num_guest_devices) != 0; +} + +u32 vmci_get_vm_context_id(void) +{ + if (vm_context_id == VMCI_INVALID_ID) { + struct vmci_datagram get_cid_msg; + get_cid_msg.dst = + vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_GET_CONTEXT_ID); + get_cid_msg.src = VMCI_ANON_SRC_HANDLE; + get_cid_msg.payload_size = 0; + vm_context_id = vmci_send_datagram(&get_cid_msg); + } + return vm_context_id; +} + +static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg) +{ + if (dev->mmio_base != NULL) + return readl(dev->mmio_base + reg); + return ioread32(dev->iobase + reg); +} + +static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg) +{ + if (dev->mmio_base != NULL) + writel(val, dev->mmio_base + reg); + else + iowrite32(val, dev->iobase + reg); +} + +static void vmci_read_data(struct vmci_guest_device *vmci_dev, + void *dest, size_t size) +{ + if (vmci_dev->mmio_base == NULL) + ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR, + dest, size); + else { + /* + * For DMA datagrams, the data_buffer will contain the header on the + * first page, followed by the incoming datagram(s) on the following + * pages. The header uses an S/G element immediately following the + * header on the first page to point to the data area. + */ + struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer; + struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1); + size_t buffer_offset = dest - vmci_dev->data_buffer; + + buffer_header->opcode = 1; + buffer_header->size = 1; + buffer_header->busy = 0; + sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset; + sg_array[0].size = size; + + vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base), + VMCI_DATA_IN_LOW_ADDR); + + wait_event(vmci_dev->inout_wq, buffer_header->busy == 1); + } +} + +static int vmci_write_data(struct vmci_guest_device *dev, + struct vmci_datagram *dg) +{ + int result; + + if (dev->mmio_base != NULL) { + struct vmci_data_in_out_header *buffer_header = dev->tx_buffer; + u8 *dg_out_buffer = (u8 *)(buffer_header + 1); + + if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE) + return VMCI_ERROR_INVALID_ARGS; + + /* + * Initialize send buffer with outgoing datagram + * and set up header for inline data. Device will + * not access buffer asynchronously - only after + * the write to VMCI_DATA_OUT_LOW_ADDR. + */ + memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg)); + buffer_header->opcode = 0; + buffer_header->size = VMCI_DG_SIZE(dg); + buffer_header->busy = 1; + + vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base), + VMCI_DATA_OUT_LOW_ADDR); + + /* Caller holds a spinlock, so cannot block. */ + spin_until_cond(buffer_header->busy == 0); + + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); + if (result == VMCI_SUCCESS) + result = (int)buffer_header->result; + } else { + iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR, + dg, VMCI_DG_SIZE(dg)); + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); + } + + return result; +} + +/* + * VM to hypervisor call mechanism. We use the standard VMware naming + * convention since shared code is calling this function as well. + */ +int vmci_send_datagram(struct vmci_datagram *dg) +{ + unsigned long flags; + int result; + + /* Check args. */ + if (dg == NULL) + return VMCI_ERROR_INVALID_ARGS; + + /* + * Need to acquire spinlock on the device because the datagram + * data may be spread over multiple pages and the monitor may + * interleave device user rpc calls from multiple + * VCPUs. Acquiring the spinlock precludes that + * possibility. Disabling interrupts to avoid incoming + * datagrams during a "rep out" and possibly landing up in + * this function. + */ + spin_lock_irqsave(&vmci_dev_spinlock, flags); + + if (vmci_dev_g) { + vmci_write_data(vmci_dev_g, dg); + result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR); + } else { + result = VMCI_ERROR_UNAVAILABLE; + } + + spin_unlock_irqrestore(&vmci_dev_spinlock, flags); + + return result; +} +EXPORT_SYMBOL_GPL(vmci_send_datagram); + +/* + * Gets called with the new context id if updated or resumed. + * Context id. + */ +static void vmci_guest_cid_update(u32 sub_id, + const struct vmci_event_data *event_data, + void *client_data) +{ + const struct vmci_event_payld_ctx *ev_payload = + vmci_event_data_const_payload(event_data); + + if (sub_id != ctx_update_sub_id) { + pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id); + return; + } + + if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) { + pr_devel("Invalid event data\n"); + return; + } + + pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n", + vm_context_id, ev_payload->context_id, event_data->event); + + vm_context_id = ev_payload->context_id; +} + +/* + * Verify that the host supports the hypercalls we need. If it does not, + * try to find fallback hypercalls and use those instead. Returns 0 if + * required hypercalls (or fallback hypercalls) are supported by the host, + * an error code otherwise. + */ +static int vmci_check_host_caps(struct pci_dev *pdev) +{ + bool result; + struct vmci_resource_query_msg *msg; + u32 msg_size = sizeof(struct vmci_resource_query_hdr) + + VMCI_UTIL_NUM_RESOURCES * sizeof(u32); + struct vmci_datagram *check_msg; + + check_msg = kzalloc(msg_size, GFP_KERNEL); + if (!check_msg) { + dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__); + return -ENOMEM; + } + + check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID, + VMCI_RESOURCES_QUERY); + check_msg->src = VMCI_ANON_SRC_HANDLE; + check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE; + msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg); + + msg->num_resources = VMCI_UTIL_NUM_RESOURCES; + msg->resources[0] = VMCI_GET_CONTEXT_ID; + + /* Checks that hyper calls are supported */ + result = vmci_send_datagram(check_msg) == 0x01; + kfree(check_msg); + + dev_dbg(&pdev->dev, "%s: Host capability check: %s\n", + __func__, result ? "PASSED" : "FAILED"); + + /* We need the vector. There are no fallbacks. */ + return result ? 0 : -ENXIO; +} + +/* + * Reads datagrams from the device and dispatches them. For IO port + * based access to the device, we always start reading datagrams into + * only the first page of the datagram buffer. If the datagrams don't + * fit into one page, we use the maximum datagram buffer size for the + * remainder of the invocation. This is a simple heuristic for not + * penalizing small datagrams. For DMA-based datagrams, we always + * use the maximum datagram buffer size, since there is no performance + * penalty for doing so. + * + * This function assumes that it has exclusive access to the data + * in register(s) for the duration of the call. + */ +static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev) +{ + u8 *dg_in_buffer = vmci_dev->data_buffer; + struct vmci_datagram *dg; + size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; + size_t current_dg_in_buffer_size; + size_t remaining_bytes; + bool is_io_port = vmci_dev->mmio_base == NULL; + + BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE); + + if (!is_io_port) { + /* For mmio, the first page is used for the header. */ + dg_in_buffer += PAGE_SIZE; + + /* + * For DMA-based datagram operations, there is no performance + * penalty for reading the maximum buffer size. + */ + current_dg_in_buffer_size = VMCI_MAX_DG_SIZE; + } else { + current_dg_in_buffer_size = PAGE_SIZE; + } + vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + + /* + * Read through the buffer until an invalid datagram header is + * encountered. The exit condition for datagrams read through + * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram + * can start on any page boundary in the buffer. + */ + while (dg->dst.resource != VMCI_INVALID_ID || + (is_io_port && remaining_bytes > PAGE_SIZE)) { + unsigned dg_in_size; + + /* + * If using VMCI_DATA_IN_ADDR, skip to the next page + * as a datagram can start on any page boundary. + */ + if (dg->dst.resource == VMCI_INVALID_ID) { + dg = (struct vmci_datagram *)roundup( + (uintptr_t)dg + 1, PAGE_SIZE); + remaining_bytes = + (size_t)(dg_in_buffer + + current_dg_in_buffer_size - + (u8 *)dg); + continue; + } + + dg_in_size = VMCI_DG_SIZE_ALIGNED(dg); + + if (dg_in_size <= dg_in_buffer_size) { + int result; + + /* + * If the remaining bytes in the datagram + * buffer doesn't contain the complete + * datagram, we first make sure we have enough + * room for it and then we read the reminder + * of the datagram and possibly any following + * datagrams. + */ + if (dg_in_size > remaining_bytes) { + if (remaining_bytes != + current_dg_in_buffer_size) { + + /* + * We move the partial + * datagram to the front and + * read the reminder of the + * datagram and possibly + * following calls into the + * following bytes. + */ + memmove(dg_in_buffer, dg_in_buffer + + current_dg_in_buffer_size - + remaining_bytes, + remaining_bytes); + dg = (struct vmci_datagram *) + dg_in_buffer; + } + + if (current_dg_in_buffer_size != + dg_in_buffer_size) + current_dg_in_buffer_size = + dg_in_buffer_size; + + vmci_read_data(vmci_dev, + dg_in_buffer + + remaining_bytes, + current_dg_in_buffer_size - + remaining_bytes); + } + + /* + * We special case event datagrams from the + * hypervisor. + */ + if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID && + dg->dst.resource == VMCI_EVENT_HANDLER) { + result = vmci_event_dispatch(dg); + } else { + result = vmci_datagram_invoke_guest_handler(dg); + } + if (result < VMCI_SUCCESS) + dev_dbg(vmci_dev->dev, + "Datagram with resource (ID=0x%x) failed (err=%d)\n", + dg->dst.resource, result); + + /* On to the next datagram. */ + dg = (struct vmci_datagram *)((u8 *)dg + + dg_in_size); + } else { + size_t bytes_to_skip; + + /* + * Datagram doesn't fit in datagram buffer of maximal + * size. We drop it. + */ + dev_dbg(vmci_dev->dev, + "Failed to receive datagram (size=%u bytes)\n", + dg_in_size); + + bytes_to_skip = dg_in_size - remaining_bytes; + if (current_dg_in_buffer_size != dg_in_buffer_size) + current_dg_in_buffer_size = dg_in_buffer_size; + + for (;;) { + vmci_read_data(vmci_dev, dg_in_buffer, + current_dg_in_buffer_size); + if (bytes_to_skip <= current_dg_in_buffer_size) + break; + + bytes_to_skip -= current_dg_in_buffer_size; + } + dg = (struct vmci_datagram *)(dg_in_buffer + + bytes_to_skip); + } + + remaining_bytes = + (size_t) (dg_in_buffer + current_dg_in_buffer_size - + (u8 *)dg); + + if (remaining_bytes < VMCI_DG_HEADERSIZE) { + /* Get the next batch of datagrams. */ + + vmci_read_data(vmci_dev, dg_in_buffer, + current_dg_in_buffer_size); + dg = (struct vmci_datagram *)dg_in_buffer; + remaining_bytes = current_dg_in_buffer_size; + } + } +} + +/* + * Scans the notification bitmap for raised flags, clears them + * and handles the notifications. + */ +static void vmci_process_bitmap(struct vmci_guest_device *dev) +{ + if (!dev->notification_bitmap) { + dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); + return; + } + + vmci_dbell_scan_notification_entries(dev->notification_bitmap); +} + +/* + * Interrupt handler for legacy or MSI interrupt, or for first MSI-X + * interrupt (vector VMCI_INTR_DATAGRAM). + */ +static irqreturn_t vmci_interrupt(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + /* + * If we are using MSI-X with exclusive vectors then we simply call + * vmci_dispatch_dgs(), since we know the interrupt was meant for us. + * Otherwise we must read the ICR to determine what to do. + */ + + if (dev->exclusive_vectors) { + vmci_dispatch_dgs(dev); + } else { + unsigned int icr; + + /* Acknowledge interrupt and determine what needs doing. */ + icr = vmci_read_reg(dev, VMCI_ICR_ADDR); + if (icr == 0 || icr == ~0) + return IRQ_NONE; + + if (icr & VMCI_ICR_DATAGRAM) { + vmci_dispatch_dgs(dev); + icr &= ~VMCI_ICR_DATAGRAM; + } + + if (icr & VMCI_ICR_NOTIFICATION) { + vmci_process_bitmap(dev); + icr &= ~VMCI_ICR_NOTIFICATION; + } + + + if (icr & VMCI_ICR_DMA_DATAGRAM) { + wake_up_all(&dev->inout_wq); + icr &= ~VMCI_ICR_DMA_DATAGRAM; + } + + if (icr != 0) + dev_warn(dev->dev, + "Ignoring unknown interrupt cause (%d)\n", + icr); + } + + return IRQ_HANDLED; +} + +/* + * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION, + * which is for the notification bitmap. Will only get called if we are + * using MSI-X with exclusive vectors. + */ +static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + /* For MSI-X we can just assume it was meant for us. */ + vmci_process_bitmap(dev); + + return IRQ_HANDLED; +} + +/* + * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM, + * which is for the completion of a DMA datagram send or receive operation. + * Will only get called if we are using MSI-X with exclusive vectors. + */ +static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev) +{ + struct vmci_guest_device *dev = _dev; + + wake_up_all(&dev->inout_wq); + + return IRQ_HANDLED; +} + +static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev) +{ + if (vmci_dev->mmio_base != NULL) { + if (vmci_dev->tx_buffer != NULL) + dma_free_coherent(vmci_dev->dev, + VMCI_DMA_DG_BUFFER_SIZE, + vmci_dev->tx_buffer, + vmci_dev->tx_buffer_base); + if (vmci_dev->data_buffer != NULL) + dma_free_coherent(vmci_dev->dev, + VMCI_DMA_DG_BUFFER_SIZE, + vmci_dev->data_buffer, + vmci_dev->data_buffer_base); + } else { + vfree(vmci_dev->data_buffer); + } +} + +/* + * Most of the initialization at module load time is done here. + */ +static int vmci_guest_probe_device(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct vmci_guest_device *vmci_dev; + void __iomem *iobase = NULL; + void __iomem *mmio_base = NULL; + unsigned int num_irq_vectors; + unsigned int capabilities; + unsigned int caps_in_use; + unsigned long cmd; + int vmci_err; + int error; + + dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n"); + + error = pcim_enable_device(pdev); + if (error) { + dev_err(&pdev->dev, + "Failed to enable VMCI device: %d\n", error); + return error; + } + + /* + * The VMCI device with mmio access to registers requests 256KB + * for BAR1. If present, driver will use new VMCI device + * functionality for register access and datagram send/recv. + */ + + if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) { + dev_info(&pdev->dev, "MMIO register access is available\n"); + mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET, + VMCI_MMIO_ACCESS_SIZE); + /* If the map fails, we fall back to IOIO access. */ + if (!mmio_base) + dev_warn(&pdev->dev, "Failed to map MMIO register access\n"); + } + + if (!mmio_base) { + if (IS_ENABLED(CONFIG_ARM64)) { + dev_err(&pdev->dev, "MMIO base is invalid\n"); + return -ENXIO; + } + error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME); + if (error) { + dev_err(&pdev->dev, "Failed to reserve/map IO regions\n"); + return error; + } + iobase = pcim_iomap_table(pdev)[0]; + } + + vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL); + if (!vmci_dev) { + dev_err(&pdev->dev, + "Can't allocate memory for VMCI device\n"); + return -ENOMEM; + } + + vmci_dev->dev = &pdev->dev; + vmci_dev->exclusive_vectors = false; + vmci_dev->iobase = iobase; + vmci_dev->mmio_base = mmio_base; + + init_waitqueue_head(&vmci_dev->inout_wq); + + if (mmio_base != NULL) { + vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, + &vmci_dev->tx_buffer_base, + GFP_KERNEL); + if (!vmci_dev->tx_buffer) { + dev_err(&pdev->dev, + "Can't allocate memory for datagram tx buffer\n"); + return -ENOMEM; + } + + vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE, + &vmci_dev->data_buffer_base, + GFP_KERNEL); + } else { + vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE); + } + if (!vmci_dev->data_buffer) { + dev_err(&pdev->dev, + "Can't allocate memory for datagram buffer\n"); + error = -ENOMEM; + goto err_free_data_buffers; + } + + pci_set_master(pdev); /* To enable queue_pair functionality. */ + + /* + * Verify that the VMCI Device supports the capabilities that + * we need. If the device is missing capabilities that we would + * like to use, check for fallback capabilities and use those + * instead (so we can run a new VM on old hosts). Fail the load if + * a required capability is missing and there is no fallback. + * + * Right now, we need datagrams. There are no fallbacks. + */ + capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR); + if (!(capabilities & VMCI_CAPS_DATAGRAM)) { + dev_err(&pdev->dev, "Device does not support datagrams\n"); + error = -ENXIO; + goto err_free_data_buffers; + } + caps_in_use = VMCI_CAPS_DATAGRAM; + + /* + * Use 64-bit PPNs if the device supports. + * + * There is no check for the return value of dma_set_mask_and_coherent + * since this driver can handle the default mask values if + * dma_set_mask_and_coherent fails. + */ + if (capabilities & VMCI_CAPS_PPN64) { + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + use_ppn64 = true; + caps_in_use |= VMCI_CAPS_PPN64; + } else { + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); + use_ppn64 = false; + } + + /* + * If the hardware supports notifications, we will use that as + * well. + */ + if (capabilities & VMCI_CAPS_NOTIFICATIONS) { + vmci_dev->notification_bitmap = dma_alloc_coherent( + &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base, + GFP_KERNEL); + if (!vmci_dev->notification_bitmap) + dev_warn(&pdev->dev, + "Unable to allocate notification bitmap\n"); + else + caps_in_use |= VMCI_CAPS_NOTIFICATIONS; + } + + if (mmio_base != NULL) { + if (capabilities & VMCI_CAPS_DMA_DATAGRAM) { + caps_in_use |= VMCI_CAPS_DMA_DATAGRAM; + } else { + dev_err(&pdev->dev, + "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n"); + error = -ENXIO; + goto err_free_notification_bitmap; + } + } + + dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use); + + /* Let the host know which capabilities we intend to use. */ + vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR); + + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { + /* Let the device know the size for pages passed down. */ + vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT); + + /* Configure the high order parts of the data in/out buffers. */ + vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base), + VMCI_DATA_IN_HIGH_ADDR); + vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base), + VMCI_DATA_OUT_HIGH_ADDR); + } + + /* Set up global device so that we can start sending datagrams */ + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = vmci_dev; + vmci_pdev = pdev; + spin_unlock_irq(&vmci_dev_spinlock); + + /* + * Register notification bitmap with device if that capability is + * used. + */ + if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) { + unsigned long bitmap_ppn = + vmci_dev->notification_base >> PAGE_SHIFT; + if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) { + dev_warn(&pdev->dev, + "VMCI device unable to register notification bitmap with PPN 0x%lx\n", + bitmap_ppn); + error = -ENXIO; + goto err_remove_vmci_dev_g; + } + } + + /* Check host capabilities. */ + error = vmci_check_host_caps(pdev); + if (error) + goto err_remove_vmci_dev_g; + + /* Enable device. */ + + /* + * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can + * update the internal context id when needed. + */ + vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE, + vmci_guest_cid_update, NULL, + &ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to subscribe to event (type=%d): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, vmci_err); + + /* + * Enable interrupts. Try MSI-X first, then MSI, and then fallback on + * legacy interrupts. + */ + if (vmci_dev->mmio_base != NULL) + num_irq_vectors = VMCI_MAX_INTRS; + else + num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION; + error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors, + PCI_IRQ_MSIX); + if (error < 0) { + error = pci_alloc_irq_vectors(pdev, 1, 1, + PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY); + if (error < 0) + goto err_unsubscribe_event; + } else { + vmci_dev->exclusive_vectors = true; + } + + /* + * Request IRQ for legacy or MSI interrupts, or for first + * MSI-X vector. + */ + error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL, + vmci_interrupt, IRQF_SHARED, + KBUILD_MODNAME, vmci_dev); + if (error) { + dev_err(&pdev->dev, "Irq %u in use: %d\n", + pci_irq_vector(pdev, 0), error); + goto err_disable_msi; + } + + /* + * For MSI-X with exclusive vectors we need to request an + * interrupt for each vector so that we get a separate + * interrupt handler routine. This allows us to distinguish + * between the vectors. + */ + if (vmci_dev->exclusive_vectors) { + error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL, + vmci_interrupt_bm, 0, + KBUILD_MODNAME, vmci_dev); + if (error) { + dev_err(&pdev->dev, + "Failed to allocate irq %u: %d\n", + pci_irq_vector(pdev, 1), error); + goto err_free_irq; + } + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { + error = request_threaded_irq(pci_irq_vector(pdev, 2), + NULL, + vmci_interrupt_dma_datagram, + 0, KBUILD_MODNAME, + vmci_dev); + if (error) { + dev_err(&pdev->dev, + "Failed to allocate irq %u: %d\n", + pci_irq_vector(pdev, 2), error); + goto err_free_bm_irq; + } + } + } + + dev_dbg(&pdev->dev, "Registered device\n"); + + atomic_inc(&vmci_num_guest_devices); + + /* Enable specific interrupt bits. */ + cmd = VMCI_IMR_DATAGRAM; + if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) + cmd |= VMCI_IMR_NOTIFICATION; + if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) + cmd |= VMCI_IMR_DMA_DATAGRAM; + vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR); + + /* Enable interrupts. */ + vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR); + + pci_set_drvdata(pdev, vmci_dev); + + vmci_call_vsock_callback(false); + return 0; + +err_free_bm_irq: + if (vmci_dev->exclusive_vectors) + free_irq(pci_irq_vector(pdev, 1), vmci_dev); + +err_free_irq: + free_irq(pci_irq_vector(pdev, 0), vmci_dev); + +err_disable_msi: + pci_free_irq_vectors(pdev); + +err_unsubscribe_event: + vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); + +err_remove_vmci_dev_g: + spin_lock_irq(&vmci_dev_spinlock); + vmci_pdev = NULL; + vmci_dev_g = NULL; + spin_unlock_irq(&vmci_dev_spinlock); + +err_free_notification_bitmap: + if (vmci_dev->notification_bitmap) { + vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); + dma_free_coherent(&pdev->dev, PAGE_SIZE, + vmci_dev->notification_bitmap, + vmci_dev->notification_base); + } + +err_free_data_buffers: + vmci_free_dg_buffers(vmci_dev); + + /* The rest are managed resources and will be freed by PCI core */ + return error; +} + +static void vmci_guest_remove_device(struct pci_dev *pdev) +{ + struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev); + int vmci_err; + + dev_dbg(&pdev->dev, "Removing device\n"); + + atomic_dec(&vmci_num_guest_devices); + + vmci_qp_guest_endpoints_exit(); + + vmci_err = vmci_event_unsubscribe(ctx_update_sub_id); + if (vmci_err < VMCI_SUCCESS) + dev_warn(&pdev->dev, + "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n", + VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err); + + spin_lock_irq(&vmci_dev_spinlock); + vmci_dev_g = NULL; + vmci_pdev = NULL; + spin_unlock_irq(&vmci_dev_spinlock); + + dev_dbg(&pdev->dev, "Resetting vmci device\n"); + vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR); + + /* + * Free IRQ and then disable MSI/MSI-X as appropriate. For + * MSI-X, we might have multiple vectors, each with their own + * IRQ, which we must free too. + */ + if (vmci_dev->exclusive_vectors) { + free_irq(pci_irq_vector(pdev, 1), vmci_dev); + if (vmci_dev->mmio_base != NULL) + free_irq(pci_irq_vector(pdev, 2), vmci_dev); + } + free_irq(pci_irq_vector(pdev, 0), vmci_dev); + pci_free_irq_vectors(pdev); + + if (vmci_dev->notification_bitmap) { + /* + * The device reset above cleared the bitmap state of the + * device, so we can safely free it here. + */ + + dma_free_coherent(&pdev->dev, PAGE_SIZE, + vmci_dev->notification_bitmap, + vmci_dev->notification_base); + } + + vmci_free_dg_buffers(vmci_dev); + + if (vmci_dev->mmio_base != NULL) + pci_iounmap(pdev, vmci_dev->mmio_base); + + /* The rest are managed resources and will be freed by PCI core */ +} + +static const struct pci_device_id vmci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), }, + { 0 }, +}; +MODULE_DEVICE_TABLE(pci, vmci_ids); + +static struct pci_driver vmci_guest_driver = { + .name = KBUILD_MODNAME, + .id_table = vmci_ids, + .probe = vmci_guest_probe_device, + .remove = vmci_guest_remove_device, +}; + +int __init vmci_guest_init(void) +{ + return pci_register_driver(&vmci_guest_driver); +} + +void __exit vmci_guest_exit(void) +{ + pci_unregister_driver(&vmci_guest_driver); +} |