diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /arch/um/drivers/virtio_uml.c | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/um/drivers/virtio_uml.c')
-rw-r--r-- | arch/um/drivers/virtio_uml.c | 1473 |
1 files changed, 1473 insertions, 0 deletions
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c new file mode 100644 index 000000000..ddd080f6d --- /dev/null +++ b/arch/um/drivers/virtio_uml.c @@ -0,0 +1,1473 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Virtio vhost-user driver + * + * Copyright(c) 2019 Intel Corporation + * + * This driver allows virtio devices to be used over a vhost-user socket. + * + * Guest devices can be instantiated by kernel module or command line + * parameters. One device will be created for each parameter. Syntax: + * + * virtio_uml.device=<socket>:<virtio_id>[:<platform_id>] + * where: + * <socket> := vhost-user socket path to connect + * <virtio_id> := virtio device id (as in virtio_ids.h) + * <platform_id> := (optional) platform device id + * + * example: + * virtio_uml.device=/var/uml.socket:1 + * + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd. + */ +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/virtio.h> +#include <linux/virtio_config.h> +#include <linux/virtio_ring.h> +#include <linux/time-internal.h> +#include <linux/virtio-uml.h> +#include <shared/as-layout.h> +#include <irq_kern.h> +#include <init.h> +#include <os.h> +#include "vhost_user.h" + +#define MAX_SUPPORTED_QUEUE_SIZE 256 + +#define to_virtio_uml_device(_vdev) \ + container_of(_vdev, struct virtio_uml_device, vdev) + +struct virtio_uml_platform_data { + u32 virtio_device_id; + const char *socket_path; + struct work_struct conn_broken_wk; + struct platform_device *pdev; +}; + +struct virtio_uml_device { + struct virtio_device vdev; + struct platform_device *pdev; + struct virtio_uml_platform_data *pdata; + + spinlock_t sock_lock; + int sock, req_fd, irq; + u64 features; + u64 protocol_features; + u8 status; + u8 registered:1; + u8 suspended:1; + u8 no_vq_suspend:1; + + u8 config_changed_irq:1; + uint64_t vq_irq_vq_map; + int recv_rc; +}; + +struct virtio_uml_vq_info { + int kick_fd, call_fd; + char name[32]; + bool suspended; +}; + +extern unsigned long long physmem_size, highmem; + +#define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) + +/* Vhost-user protocol */ + +static int full_sendmsg_fds(int fd, const void *buf, unsigned int len, + const int *fds, unsigned int fds_num) +{ + int rc; + + do { + rc = os_sendmsg_fds(fd, buf, len, fds, fds_num); + if (rc > 0) { + buf += rc; + len -= rc; + fds = NULL; + fds_num = 0; + } + } while (len && (rc >= 0 || rc == -EINTR)); + + if (rc < 0) + return rc; + return 0; +} + +static int full_read(int fd, void *buf, int len, bool abortable) +{ + int rc; + + if (!len) + return 0; + + do { + rc = os_read_file(fd, buf, len); + if (rc > 0) { + buf += rc; + len -= rc; + } + } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN))); + + if (rc < 0) + return rc; + if (rc == 0) + return -ECONNRESET; + return 0; +} + +static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) +{ + return full_read(fd, msg, sizeof(msg->header), true); +} + +static int vhost_user_recv(struct virtio_uml_device *vu_dev, + int fd, struct vhost_user_msg *msg, + size_t max_payload_size, bool wait) +{ + size_t size; + int rc; + + /* + * In virtio time-travel mode, we're handling all the vhost-user + * FDs by polling them whenever appropriate. However, we may get + * into a situation where we're sending out an interrupt message + * to a device (e.g. a net device) and need to handle a simulation + * time message while doing so, e.g. one that tells us to update + * our idea of how long we can run without scheduling. + * + * Thus, we need to not just read() from the given fd, but need + * to also handle messages for the simulation time - this function + * does that for us while waiting for the given fd to be readable. + */ + if (wait) + time_travel_wait_readable(fd); + + rc = vhost_user_recv_header(fd, msg); + + if (rc) + return rc; + size = msg->header.size; + if (size > max_payload_size) + return -EPROTO; + return full_read(fd, &msg->payload, size, false); +} + +static void vhost_user_check_reset(struct virtio_uml_device *vu_dev, + int rc) +{ + struct virtio_uml_platform_data *pdata = vu_dev->pdata; + + if (rc != -ECONNRESET) + return; + + if (!vu_dev->registered) + return; + + vu_dev->registered = 0; + + schedule_work(&pdata->conn_broken_wk); +} + +static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, + size_t max_payload_size) +{ + int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, + max_payload_size, true); + + if (rc) { + vhost_user_check_reset(vu_dev, rc); + return rc; + } + + if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) + return -EPROTO; + + return 0; +} + +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev, + u64 *value) +{ + struct vhost_user_msg msg; + int rc = vhost_user_recv_resp(vu_dev, &msg, + sizeof(msg.payload.integer)); + + if (rc) + return rc; + if (msg.header.size != sizeof(msg.payload.integer)) + return -EPROTO; + *value = msg.payload.integer; + return 0; +} + +static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, + size_t max_payload_size) +{ + int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, + max_payload_size, false); + + if (rc) + return rc; + + if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) != + VHOST_USER_VERSION) + return -EPROTO; + + return 0; +} + +static int vhost_user_send(struct virtio_uml_device *vu_dev, + bool need_response, struct vhost_user_msg *msg, + int *fds, size_t num_fds) +{ + size_t size = sizeof(msg->header) + msg->header.size; + unsigned long flags; + bool request_ack; + int rc; + + msg->header.flags |= VHOST_USER_VERSION; + + /* + * The need_response flag indicates that we already need a response, + * e.g. to read the features. In these cases, don't request an ACK as + * it is meaningless. Also request an ACK only if supported. + */ + request_ack = !need_response; + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK))) + request_ack = false; + + if (request_ack) + msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; + + spin_lock_irqsave(&vu_dev->sock_lock, flags); + rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); + if (rc < 0) + goto out; + + if (request_ack) { + uint64_t status; + + rc = vhost_user_recv_u64(vu_dev, &status); + if (rc) + goto out; + + if (status) { + vu_err(vu_dev, "slave reports error: %llu\n", status); + rc = -EIO; + goto out; + } + } + +out: + spin_unlock_irqrestore(&vu_dev->sock_lock, flags); + return rc; +} + +static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, + bool need_response, u32 request) +{ + struct vhost_user_msg msg = { + .header.request = request, + }; + + return vhost_user_send(vu_dev, need_response, &msg, NULL, 0); +} + +static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev, + u32 request, int fd) +{ + struct vhost_user_msg msg = { + .header.request = request, + }; + + return vhost_user_send(vu_dev, false, &msg, &fd, 1); +} + +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev, + u32 request, u64 value) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.integer), + .payload.integer = value, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev) +{ + return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER); +} + +static int vhost_user_get_features(struct virtio_uml_device *vu_dev, + u64 *features) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_FEATURES); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, features); +} + +static int vhost_user_set_features(struct virtio_uml_device *vu_dev, + u64 features) +{ + return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features); +} + +static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev, + u64 *protocol_features) +{ + int rc = vhost_user_send_no_payload(vu_dev, true, + VHOST_USER_GET_PROTOCOL_FEATURES); + + if (rc) + return rc; + return vhost_user_recv_u64(vu_dev, protocol_features); +} + +static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, + u64 protocol_features) +{ + return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES, + protocol_features); +} + +static void vhost_user_reply(struct virtio_uml_device *vu_dev, + struct vhost_user_msg *msg, int response) +{ + struct vhost_user_msg reply = { + .payload.integer = response, + }; + size_t size = sizeof(reply.header) + sizeof(reply.payload.integer); + int rc; + + reply.header = msg->header; + reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY; + reply.header.flags |= VHOST_USER_FLAG_REPLY; + reply.header.size = sizeof(reply.payload.integer); + + rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0); + + if (rc) + vu_err(vu_dev, + "sending reply to slave request failed: %d (size %zu)\n", + rc, size); +} + +static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, + struct time_travel_event *ev) +{ + struct virtqueue *vq; + int response = 1; + struct { + struct vhost_user_msg msg; + u8 extra_payload[512]; + } msg; + int rc; + irqreturn_t irq_rc = IRQ_NONE; + + while (1) { + rc = vhost_user_recv_req(vu_dev, &msg.msg, + sizeof(msg.msg.payload) + + sizeof(msg.extra_payload)); + if (rc) + break; + + switch (msg.msg.header.request) { + case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: + vu_dev->config_changed_irq = true; + response = 0; + break; + case VHOST_USER_SLAVE_VRING_CALL: + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vq->index == msg.msg.payload.vring_state.index) { + response = 0; + vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); + break; + } + } + break; + case VHOST_USER_SLAVE_IOTLB_MSG: + /* not supported - VIRTIO_F_ACCESS_PLATFORM */ + case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: + /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ + default: + vu_err(vu_dev, "unexpected slave request %d\n", + msg.msg.header.request); + } + + if (ev && !vu_dev->suspended) + time_travel_add_irq_event(ev); + + if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) + vhost_user_reply(vu_dev, &msg.msg, response); + irq_rc = IRQ_HANDLED; + }; + /* mask EAGAIN as we try non-blocking read until socket is empty */ + vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc; + return irq_rc; +} + +static irqreturn_t vu_req_interrupt(int irq, void *data) +{ + struct virtio_uml_device *vu_dev = data; + irqreturn_t ret = IRQ_HANDLED; + + if (!um_irq_timetravel_handler_used()) + ret = vu_req_read_message(vu_dev, NULL); + + if (vu_dev->recv_rc) { + vhost_user_check_reset(vu_dev, vu_dev->recv_rc); + } else if (vu_dev->vq_irq_vq_map) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) + vring_interrupt(0 /* ignored */, vq); + } + vu_dev->vq_irq_vq_map = 0; + } else if (vu_dev->config_changed_irq) { + virtio_config_changed(&vu_dev->vdev); + vu_dev->config_changed_irq = false; + } + + return ret; +} + +static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, + struct time_travel_event *ev) +{ + vu_req_read_message(data, ev); +} + +static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) +{ + int rc, req_fds[2]; + + /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */ + rc = os_pipe(req_fds, true, true); + if (rc < 0) + return rc; + vu_dev->req_fd = req_fds[0]; + + rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, + vu_req_interrupt, IRQF_SHARED, + vu_dev->pdev->name, vu_dev, + vu_req_interrupt_comm_handler); + if (rc < 0) + goto err_close; + + vu_dev->irq = rc; + + rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD, + req_fds[1]); + if (rc) + goto err_free_irq; + + goto out; + +err_free_irq: + um_free_irq(vu_dev->irq, vu_dev); +err_close: + os_close_file(req_fds[0]); +out: + /* Close unused write end of request fds */ + os_close_file(req_fds[1]); + return rc; +} + +static int vhost_user_init(struct virtio_uml_device *vu_dev) +{ + int rc = vhost_user_set_owner(vu_dev); + + if (rc) + return rc; + rc = vhost_user_get_features(vu_dev, &vu_dev->features); + if (rc) + return rc; + + if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) { + rc = vhost_user_get_protocol_features(vu_dev, + &vu_dev->protocol_features); + if (rc) + return rc; + vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F; + rc = vhost_user_set_protocol_features(vu_dev, + vu_dev->protocol_features); + if (rc) + return rc; + } + + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + rc = vhost_user_init_slave_req(vu_dev); + if (rc) + return rc; + } + + return 0; +} + +static void vhost_user_get_config(struct virtio_uml_device *vu_dev, + u32 offset, void *buf, u32 len) +{ + u32 cfg_size = offset + len; + struct vhost_user_msg *msg; + size_t payload_size = sizeof(msg->payload.config) + cfg_size; + size_t msg_size = sizeof(msg->header) + payload_size; + int rc; + + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) + return; + + msg = kzalloc(msg_size, GFP_KERNEL); + if (!msg) + return; + msg->header.request = VHOST_USER_GET_CONFIG; + msg->header.size = payload_size; + msg->payload.config.offset = 0; + msg->payload.config.size = cfg_size; + + rc = vhost_user_send(vu_dev, true, msg, NULL, 0); + if (rc) { + vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n", + rc); + goto free; + } + + rc = vhost_user_recv_resp(vu_dev, msg, msg_size); + if (rc) { + vu_err(vu_dev, + "receiving VHOST_USER_GET_CONFIG response failed: %d\n", + rc); + goto free; + } + + if (msg->header.size != payload_size || + msg->payload.config.size != cfg_size) { + rc = -EPROTO; + vu_err(vu_dev, + "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n", + msg->header.size, payload_size, + msg->payload.config.size, cfg_size); + goto free; + } + memcpy(buf, msg->payload.config.payload + offset, len); + +free: + kfree(msg); +} + +static void vhost_user_set_config(struct virtio_uml_device *vu_dev, + u32 offset, const void *buf, u32 len) +{ + struct vhost_user_msg *msg; + size_t payload_size = sizeof(msg->payload.config) + len; + size_t msg_size = sizeof(msg->header) + payload_size; + int rc; + + if (!(vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) + return; + + msg = kzalloc(msg_size, GFP_KERNEL); + if (!msg) + return; + msg->header.request = VHOST_USER_SET_CONFIG; + msg->header.size = payload_size; + msg->payload.config.offset = offset; + msg->payload.config.size = len; + memcpy(msg->payload.config.payload, buf, len); + + rc = vhost_user_send(vu_dev, false, msg, NULL, 0); + if (rc) + vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n", + rc); + + kfree(msg); +} + +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out, + struct vhost_user_mem_region *region_out) +{ + unsigned long long mem_offset; + int rc = phys_mapping(addr, &mem_offset); + + if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc)) + return -EFAULT; + *fd_out = rc; + region_out->guest_addr = addr; + region_out->user_addr = addr; + region_out->size = size; + region_out->mmap_offset = mem_offset; + + /* Ensure mapping is valid for the entire region */ + rc = phys_mapping(addr + size - 1, &mem_offset); + if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n", + addr + size - 1, rc, *fd_out)) + return -EFAULT; + return 0; +} + +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) +{ + struct vhost_user_msg msg = { + .header.request = VHOST_USER_SET_MEM_TABLE, + .header.size = sizeof(msg.payload.mem_regions), + .payload.mem_regions.num = 1, + }; + unsigned long reserved = uml_reserved - uml_physmem; + int fds[2]; + int rc; + + /* + * This is a bit tricky, see also the comment with setup_physmem(). + * + * Essentially, setup_physmem() uses a file to mmap() our physmem, + * but the code and data we *already* have is omitted. To us, this + * is no difference, since they both become part of our address + * space and memory consumption. To somebody looking in from the + * outside, however, it is different because the part of our memory + * consumption that's already part of the binary (code/data) is not + * mapped from the file, so it's not visible to another mmap from + * the file descriptor. + * + * Thus, don't advertise this space to the vhost-user slave. This + * means that the slave will likely abort or similar when we give + * it an address from the hidden range, since it's not marked as + * a valid address, but at least that way we detect the issue and + * don't just have the slave read an all-zeroes buffer from the + * shared memory file, or write something there that we can never + * see (depending on the direction of the virtqueue traffic.) + * + * Since we usually don't want to use .text for virtio buffers, + * this effectively means that you cannot use + * 1) global variables, which are in the .bss and not in the shm + * file-backed memory + * 2) the stack in some processes, depending on where they have + * their stack (or maybe only no interrupt stack?) + * + * The stack is already not typically valid for DMA, so this isn't + * much of a restriction, but global variables might be encountered. + * + * It might be possible to fix it by copying around the data that's + * between bss_start and where we map the file now, but it's not + * something that you typically encounter with virtio drivers, so + * it didn't seem worthwhile. + */ + rc = vhost_user_init_mem_region(reserved, physmem_size - reserved, + &fds[0], + &msg.payload.mem_regions.regions[0]); + + if (rc < 0) + return rc; + if (highmem) { + msg.payload.mem_regions.num++; + rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, + &fds[1], &msg.payload.mem_regions.regions[1]); + if (rc < 0) + return rc; + } + + return vhost_user_send(vu_dev, false, &msg, fds, + msg.payload.mem_regions.num); +} + +static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev, + u32 request, u32 index, u32 num) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.vring_state), + .payload.vring_state.index = index, + .payload.vring_state.num = num, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev, + u32 index, u32 num) +{ + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM, + index, num); +} + +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev, + u32 index, u32 offset) +{ + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE, + index, offset); +} + +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev, + u32 index, u64 desc, u64 used, u64 avail, + u64 log) +{ + struct vhost_user_msg msg = { + .header.request = VHOST_USER_SET_VRING_ADDR, + .header.size = sizeof(msg.payload.vring_addr), + .payload.vring_addr.index = index, + .payload.vring_addr.desc = desc, + .payload.vring_addr.used = used, + .payload.vring_addr.avail = avail, + .payload.vring_addr.log = log, + }; + + return vhost_user_send(vu_dev, false, &msg, NULL, 0); +} + +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev, + u32 request, int index, int fd) +{ + struct vhost_user_msg msg = { + .header.request = request, + .header.size = sizeof(msg.payload.integer), + .payload.integer = index, + }; + + if (index & ~VHOST_USER_VRING_INDEX_MASK) + return -EINVAL; + if (fd < 0) { + msg.payload.integer |= VHOST_USER_VRING_POLL_MASK; + return vhost_user_send(vu_dev, false, &msg, NULL, 0); + } + return vhost_user_send(vu_dev, false, &msg, &fd, 1); +} + +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev, + int index, int fd) +{ + return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL, + index, fd); +} + +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev, + int index, int fd) +{ + return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK, + index, fd); +} + +static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev, + u32 index, bool enable) +{ + if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES))) + return 0; + + return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE, + index, enable); +} + + +/* Virtio interface */ + +static bool vu_notify(struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + const uint64_t n = 1; + int rc; + + if (info->suspended) + return true; + + time_travel_propagate_time(); + + if (info->kick_fd < 0) { + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, + vq->index, 0) == 0; + } + + do { + rc = os_write_file(info->kick_fd, &n, sizeof(n)); + } while (rc == -EINTR); + return !WARN(rc != sizeof(n), "write returned %d\n", rc); +} + +static irqreturn_t vu_interrupt(int irq, void *opaque) +{ + struct virtqueue *vq = opaque; + struct virtio_uml_vq_info *info = vq->priv; + uint64_t n; + int rc; + irqreturn_t ret = IRQ_NONE; + + do { + rc = os_read_file(info->call_fd, &n, sizeof(n)); + if (rc == sizeof(n)) + ret |= vring_interrupt(irq, vq); + } while (rc == sizeof(n) || rc == -EINTR); + WARN(rc != -EAGAIN, "read returned %d\n", rc); + return ret; +} + + +static void vu_get(struct virtio_device *vdev, unsigned offset, + void *buf, unsigned len) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vhost_user_get_config(vu_dev, offset, buf, len); +} + +static void vu_set(struct virtio_device *vdev, unsigned offset, + const void *buf, unsigned len) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vhost_user_set_config(vu_dev, offset, buf, len); +} + +static u8 vu_get_status(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->status; +} + +static void vu_set_status(struct virtio_device *vdev, u8 status) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vu_dev->status = status; +} + +static void vu_reset(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + vu_dev->status = 0; +} + +static void vu_del_vq(struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + + if (info->call_fd >= 0) { + struct virtio_uml_device *vu_dev; + + vu_dev = to_virtio_uml_device(vq->vdev); + + um_free_irq(vu_dev->irq, vq); + os_close_file(info->call_fd); + } + + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); + + vring_del_virtqueue(vq); + kfree(info); +} + +static void vu_del_vqs(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + struct virtqueue *vq, *n; + u64 features; + + /* Note: reverse order as a workaround to a decoding bug in snabb */ + list_for_each_entry_reverse(vq, &vdev->vqs, list) + WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false)); + + /* Ensure previous messages have been processed */ + WARN_ON(vhost_user_get_features(vu_dev, &features)); + + list_for_each_entry_safe(vq, n, &vdev->vqs, list) + vu_del_vq(vq); +} + +static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, + struct virtqueue *vq) +{ + struct virtio_uml_vq_info *info = vq->priv; + int call_fds[2]; + int rc; + + /* no call FD needed/desired in this case */ + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && + vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + info->call_fd = -1; + return 0; + } + + /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ + rc = os_pipe(call_fds, true, true); + if (rc < 0) + return rc; + + info->call_fd = call_fds[0]; + rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, + vu_interrupt, IRQF_SHARED, info->name, vq); + if (rc < 0) + goto close_both; + + rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); + if (rc) + goto release_irq; + + goto out; + +release_irq: + um_free_irq(vu_dev->irq, vq); +close_both: + os_close_file(call_fds[0]); +out: + /* Close (unused) write end of call fds */ + os_close_file(call_fds[1]); + + return rc; +} + +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, + unsigned index, vq_callback_t *callback, + const char *name, bool ctx) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + struct platform_device *pdev = vu_dev->pdev; + struct virtio_uml_vq_info *info; + struct virtqueue *vq; + int num = MAX_SUPPORTED_QUEUE_SIZE; + int rc; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto error_kzalloc; + } + snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, + pdev->id, name); + + vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, + ctx, vu_notify, callback, info->name); + if (!vq) { + rc = -ENOMEM; + goto error_create; + } + vq->priv = info; + vq->num_max = num; + num = virtqueue_get_vring_size(vq); + + if (vu_dev->protocol_features & + BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { + info->kick_fd = -1; + } else { + rc = os_eventfd(0, 0); + if (rc < 0) + goto error_kick; + info->kick_fd = rc; + } + + rc = vu_setup_vq_call_fd(vu_dev, vq); + if (rc) + goto error_call; + + rc = vhost_user_set_vring_num(vu_dev, index, num); + if (rc) + goto error_setup; + + rc = vhost_user_set_vring_base(vu_dev, index, 0); + if (rc) + goto error_setup; + + rc = vhost_user_set_vring_addr(vu_dev, index, + virtqueue_get_desc_addr(vq), + virtqueue_get_used_addr(vq), + virtqueue_get_avail_addr(vq), + (u64) -1); + if (rc) + goto error_setup; + + return vq; + +error_setup: + if (info->call_fd >= 0) { + um_free_irq(vu_dev->irq, vq); + os_close_file(info->call_fd); + } +error_call: + if (info->kick_fd >= 0) + os_close_file(info->kick_fd); +error_kick: + vring_del_virtqueue(vq); +error_create: + kfree(info); +error_kzalloc: + return ERR_PTR(rc); +} + +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], const bool *ctx, + struct irq_affinity *desc) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + int i, queue_idx = 0, rc; + struct virtqueue *vq; + + /* not supported for now */ + if (WARN_ON(nvqs > 64)) + return -EINVAL; + + rc = vhost_user_set_mem_table(vu_dev); + if (rc) + return rc; + + for (i = 0; i < nvqs; ++i) { + if (!names[i]) { + vqs[i] = NULL; + continue; + } + + vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i], + ctx ? ctx[i] : false); + if (IS_ERR(vqs[i])) { + rc = PTR_ERR(vqs[i]); + goto error_setup; + } + } + + list_for_each_entry(vq, &vdev->vqs, list) { + struct virtio_uml_vq_info *info = vq->priv; + + if (info->kick_fd >= 0) { + rc = vhost_user_set_vring_kick(vu_dev, vq->index, + info->kick_fd); + if (rc) + goto error_setup; + } + + rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); + if (rc) + goto error_setup; + } + + return 0; + +error_setup: + vu_del_vqs(vdev); + return rc; +} + +static u64 vu_get_features(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->features; +} + +static int vu_finalize_features(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + u64 supported = vdev->features & VHOST_USER_SUPPORTED_F; + + vring_transport_features(vdev); + vu_dev->features = vdev->features | supported; + + return vhost_user_set_features(vu_dev, vu_dev->features); +} + +static const char *vu_bus_name(struct virtio_device *vdev) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + return vu_dev->pdev->name; +} + +static const struct virtio_config_ops virtio_uml_config_ops = { + .get = vu_get, + .set = vu_set, + .get_status = vu_get_status, + .set_status = vu_set_status, + .reset = vu_reset, + .find_vqs = vu_find_vqs, + .del_vqs = vu_del_vqs, + .get_features = vu_get_features, + .finalize_features = vu_finalize_features, + .bus_name = vu_bus_name, +}; + +static void virtio_uml_release_dev(struct device *d) +{ + struct virtio_device *vdev = + container_of(d, struct virtio_device, dev); + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + time_travel_propagate_time(); + + /* might not have been opened due to not negotiating the feature */ + if (vu_dev->req_fd >= 0) { + um_free_irq(vu_dev->irq, vu_dev); + os_close_file(vu_dev->req_fd); + } + + os_close_file(vu_dev->sock); + kfree(vu_dev); +} + +void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, + bool no_vq_suspend) +{ + struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + + if (WARN_ON(vdev->config != &virtio_uml_config_ops)) + return; + + vu_dev->no_vq_suspend = no_vq_suspend; + dev_info(&vdev->dev, "%sabled VQ suspend\n", + no_vq_suspend ? "dis" : "en"); +} + +static void vu_of_conn_broken(struct work_struct *wk) +{ + struct virtio_uml_platform_data *pdata; + struct virtio_uml_device *vu_dev; + + pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); + + vu_dev = platform_get_drvdata(pdata->pdev); + + virtio_break_device(&vu_dev->vdev); + + /* + * We can't remove the device from the devicetree so the only thing we + * can do is warn. + */ + WARN_ON(1); +} + +/* Platform device */ + +static struct virtio_uml_platform_data * +virtio_uml_create_pdata(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct virtio_uml_platform_data *pdata; + int ret; + + if (!np) + return ERR_PTR(-EINVAL); + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken); + pdata->pdev = pdev; + + ret = of_property_read_string(np, "socket-path", &pdata->socket_path); + if (ret) + return ERR_PTR(ret); + + ret = of_property_read_u32(np, "virtio-device-id", + &pdata->virtio_device_id); + if (ret) + return ERR_PTR(ret); + + return pdata; +} + +static int virtio_uml_probe(struct platform_device *pdev) +{ + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + struct virtio_uml_device *vu_dev; + int rc; + + if (!pdata) { + pdata = virtio_uml_create_pdata(pdev); + if (IS_ERR(pdata)) + return PTR_ERR(pdata); + } + + vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); + if (!vu_dev) + return -ENOMEM; + + vu_dev->pdata = pdata; + vu_dev->vdev.dev.parent = &pdev->dev; + vu_dev->vdev.dev.release = virtio_uml_release_dev; + vu_dev->vdev.config = &virtio_uml_config_ops; + vu_dev->vdev.id.device = pdata->virtio_device_id; + vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; + vu_dev->pdev = pdev; + vu_dev->req_fd = -1; + + time_travel_propagate_time(); + + do { + rc = os_connect_socket(pdata->socket_path); + } while (rc == -EINTR); + if (rc < 0) + goto error_free; + vu_dev->sock = rc; + + spin_lock_init(&vu_dev->sock_lock); + + rc = vhost_user_init(vu_dev); + if (rc) + goto error_init; + + platform_set_drvdata(pdev, vu_dev); + + device_set_wakeup_capable(&vu_dev->vdev.dev, true); + + rc = register_virtio_device(&vu_dev->vdev); + if (rc) + put_device(&vu_dev->vdev.dev); + vu_dev->registered = 1; + return rc; + +error_init: + os_close_file(vu_dev->sock); +error_free: + kfree(vu_dev); + return rc; +} + +static int virtio_uml_remove(struct platform_device *pdev) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + unregister_virtio_device(&vu_dev->vdev); + return 0; +} + +/* Command line device list */ + +static void vu_cmdline_release_dev(struct device *d) +{ +} + +static struct device vu_cmdline_parent = { + .init_name = "virtio-uml-cmdline", + .release = vu_cmdline_release_dev, +}; + +static bool vu_cmdline_parent_registered; +static int vu_cmdline_id; + +static int vu_unregister_cmdline_device(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + + kfree(pdata->socket_path); + platform_device_unregister(pdev); + return 0; +} + +static void vu_conn_broken(struct work_struct *wk) +{ + struct virtio_uml_platform_data *pdata; + struct virtio_uml_device *vu_dev; + + pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); + + vu_dev = platform_get_drvdata(pdata->pdev); + + virtio_break_device(&vu_dev->vdev); + + vu_unregister_cmdline_device(&pdata->pdev->dev, NULL); +} + +static int vu_cmdline_set(const char *device, const struct kernel_param *kp) +{ + const char *ids = strchr(device, ':'); + unsigned int virtio_device_id; + int processed, consumed, err; + char *socket_path; + struct virtio_uml_platform_data pdata, *ppdata; + struct platform_device *pdev; + + if (!ids || ids == device) + return -EINVAL; + + processed = sscanf(ids, ":%u%n:%d%n", + &virtio_device_id, &consumed, + &vu_cmdline_id, &consumed); + + if (processed < 1 || ids[consumed]) + return -EINVAL; + + if (!vu_cmdline_parent_registered) { + err = device_register(&vu_cmdline_parent); + if (err) { + pr_err("Failed to register parent device!\n"); + put_device(&vu_cmdline_parent); + return err; + } + vu_cmdline_parent_registered = true; + } + + socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL); + if (!socket_path) + return -ENOMEM; + + pdata.virtio_device_id = (u32) virtio_device_id; + pdata.socket_path = socket_path; + + pr_info("Registering device virtio-uml.%d id=%d at %s\n", + vu_cmdline_id, virtio_device_id, socket_path); + + pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml", + vu_cmdline_id++, &pdata, + sizeof(pdata)); + err = PTR_ERR_OR_ZERO(pdev); + if (err) + goto free; + + ppdata = pdev->dev.platform_data; + ppdata->pdev = pdev; + INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken); + + return 0; + +free: + kfree(socket_path); + return err; +} + +static int vu_cmdline_get_device(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; + char *buffer = data; + unsigned int len = strlen(buffer); + + snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n", + pdata->socket_path, pdata->virtio_device_id, pdev->id); + return 0; +} + +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp) +{ + buffer[0] = '\0'; + if (vu_cmdline_parent_registered) + device_for_each_child(&vu_cmdline_parent, buffer, + vu_cmdline_get_device); + return strlen(buffer) + 1; +} + +static const struct kernel_param_ops vu_cmdline_param_ops = { + .set = vu_cmdline_set, + .get = vu_cmdline_get, +}; + +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR); +__uml_help(vu_cmdline_param_ops, +"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n" +" Configure a virtio device over a vhost-user socket.\n" +" See virtio_ids.h for a list of possible virtio device id values.\n" +" Optionally use a specific platform_device id.\n\n" +); + + +static void vu_unregister_cmdline_devices(void) +{ + if (vu_cmdline_parent_registered) { + device_for_each_child(&vu_cmdline_parent, NULL, + vu_unregister_cmdline_device); + device_unregister(&vu_cmdline_parent); + vu_cmdline_parent_registered = false; + } +} + +/* Platform driver */ + +static const struct of_device_id virtio_uml_match[] = { + { .compatible = "virtio,uml", }, + { } +}; +MODULE_DEVICE_TABLE(of, virtio_uml_match); + +static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + if (!vu_dev->no_vq_suspend) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = true; + vhost_user_set_vring_enable(vu_dev, vq->index, false); + } + } + + if (!device_may_wakeup(&vu_dev->vdev.dev)) { + vu_dev->suspended = true; + return 0; + } + + return irq_set_irq_wake(vu_dev->irq, 1); +} + +static int virtio_uml_resume(struct platform_device *pdev) +{ + struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); + + if (!vu_dev->no_vq_suspend) { + struct virtqueue *vq; + + virtio_device_for_each_vq((&vu_dev->vdev), vq) { + struct virtio_uml_vq_info *info = vq->priv; + + info->suspended = false; + vhost_user_set_vring_enable(vu_dev, vq->index, true); + } + } + + vu_dev->suspended = false; + + if (!device_may_wakeup(&vu_dev->vdev.dev)) + return 0; + + return irq_set_irq_wake(vu_dev->irq, 0); +} + +static struct platform_driver virtio_uml_driver = { + .probe = virtio_uml_probe, + .remove = virtio_uml_remove, + .driver = { + .name = "virtio-uml", + .of_match_table = virtio_uml_match, + }, + .suspend = virtio_uml_suspend, + .resume = virtio_uml_resume, +}; + +static int __init virtio_uml_init(void) +{ + return platform_driver_register(&virtio_uml_driver); +} + +static void __exit virtio_uml_exit(void) +{ + platform_driver_unregister(&virtio_uml_driver); + vu_unregister_cmdline_devices(); +} + +module_init(virtio_uml_init); +module_exit(virtio_uml_exit); +__uml_exitcall(virtio_uml_exit); + +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices"); +MODULE_LICENSE("GPL"); |