diff options
Diffstat (limited to 'src/spdk/dpdk/drivers/bus/vmbus')
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/Makefile | 33 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/linux/Makefile | 3 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/linux/vmbus_bus.c | 376 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c | 453 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/meson.build | 17 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/private.h | 141 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/rte_bus_vmbus.h | 421 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/rte_bus_vmbus_version.map | 28 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/rte_vmbus_reg.h | 344 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/vmbus_bufring.c | 244 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/vmbus_channel.c | 446 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/vmbus_common.c | 307 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/vmbus/vmbus_common_uio.c | 234 |
13 files changed, 3047 insertions, 0 deletions
diff --git a/src/spdk/dpdk/drivers/bus/vmbus/Makefile b/src/spdk/dpdk/drivers/bus/vmbus/Makefile new file mode 100644 index 000000000..335df6a0b --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: BSD-3-Clause + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_bus_vmbus.a +EXPORT_MAP := rte_bus_vmbus_version.map + +CFLAGS += -I$(SRCDIR) +CFLAGS += -O3 $(WERROR_FLAGS) + +ifneq ($(CONFIG_RTE_EXEC_ENV_LINUX),) +SYSTEM := linux +endif +ifneq ($(CONFIG_RTE_EXEC_ENV_FREEBSD),) +$(error "VMBUS not implemented for BSD yet") +endif + +CFLAGS += -I$(RTE_SDK)/drivers/bus/vmbus/$(SYSTEM) +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common + +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev + +include $(RTE_SDK)/drivers/bus/vmbus/$(SYSTEM)/Makefile +SRCS-$(CONFIG_RTE_LIBRTE_VMBUS) := $(addprefix $(SYSTEM)/,$(SRCS)) +SRCS-$(CONFIG_RTE_LIBRTE_VMBUS) += vmbus_common.c +SRCS-$(CONFIG_RTE_LIBRTE_VMBUS) += vmbus_channel.c vmbus_bufring.c +SRCS-$(CONFIG_RTE_LIBRTE_VMBUS) += vmbus_common_uio.c + +SYMLINK-$(CONFIG_RTE_LIBRTE_VMBUS)-include += rte_bus_vmbus.h +SYMLINK-$(CONFIG_RTE_LIBRTE_VMBUS)-include += rte_vmbus_reg.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/bus/vmbus/linux/Makefile b/src/spdk/dpdk/drivers/bus/vmbus/linux/Makefile new file mode 100644 index 000000000..ef0d30b2d --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/linux/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: BSD-3-Clause + +SRCS += vmbus_bus.c vmbus_uio.c diff --git a/src/spdk/dpdk/drivers/bus/vmbus/linux/vmbus_bus.c b/src/spdk/dpdk/drivers/bus/vmbus/linux/vmbus_bus.c new file mode 100644 index 000000000..3c924eee1 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/linux/vmbus_bus.c @@ -0,0 +1,376 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#include <string.h> +#include <unistd.h> +#include <dirent.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <rte_eal.h> +#include <rte_uuid.h> +#include <rte_tailq.h> +#include <rte_log.h> +#include <rte_devargs.h> +#include <rte_memory.h> +#include <rte_malloc.h> +#include <rte_bus_vmbus.h> + +#include "eal_filesystem.h" +#include "private.h" + +/** Pathname of VMBUS devices directory. */ +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices" + +/* + * GUID associated with network devices + * {f8615163-df3e-46c5-913f-f2d2f965ed0e} + */ +static const rte_uuid_t vmbus_nic_uuid = { + 0xf8, 0x61, 0x51, 0x63, + 0xdf, 0x3e, + 0x46, 0xc5, + 0x91, 0x3f, + 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0xe +}; + +extern struct rte_vmbus_bus rte_vmbus_bus; + +/* Read sysfs file to get UUID */ +static int +parse_sysfs_uuid(const char *filename, rte_uuid_t uu) +{ + char buf[BUFSIZ]; + char *cp, *in = buf; + FILE *f; + + f = fopen(filename, "r"); + if (f == NULL) { + VMBUS_LOG(ERR, "cannot open sysfs value %s: %s", + filename, strerror(errno)); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + VMBUS_LOG(ERR, "cannot read sysfs value %s", + filename); + fclose(f); + return -1; + } + fclose(f); + + cp = strchr(buf, '\n'); + if (cp) + *cp = '\0'; + + /* strip { } notation */ + if (buf[0] == '{') { + in = buf + 1; + cp = strchr(in, '}'); + if (cp) + *cp = '\0'; + } + + if (rte_uuid_parse(in, uu) < 0) { + VMBUS_LOG(ERR, "%s %s not a valid UUID", + filename, buf); + return -1; + } + + return 0; +} + +static int +get_sysfs_string(const char *filename, char *buf, size_t buflen) +{ + char *cp; + FILE *f; + + f = fopen(filename, "r"); + if (f == NULL) { + VMBUS_LOG(ERR, "cannot open sysfs value %s:%s", + filename, strerror(errno)); + return -1; + } + + if (fgets(buf, buflen, f) == NULL) { + VMBUS_LOG(ERR, "cannot read sysfs value %s", + filename); + fclose(f); + return -1; + } + fclose(f); + + /* remove trailing newline */ + cp = memchr(buf, '\n', buflen); + if (cp) + *cp = '\0'; + + return 0; +} + +static int +vmbus_get_uio_dev(const struct rte_vmbus_device *dev, + char *dstbuf, size_t buflen) +{ + char dirname[PATH_MAX]; + unsigned int uio_num; + struct dirent *e; + DIR *dir; + + /* Assume recent kernel where uio is in uio/uioX */ + snprintf(dirname, sizeof(dirname), + SYSFS_VMBUS_DEVICES "/%s/uio", dev->device.name); + + dir = opendir(dirname); + if (dir == NULL) + return -1; /* Not a UIO device */ + + /* take the first file starting with "uio" */ + while ((e = readdir(dir)) != NULL) { + const int prefix_len = 3; + char *endptr; + + if (strncmp(e->d_name, "uio", prefix_len) != 0) + continue; + + /* try uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + prefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + prefix_len)) { + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); + break; + } + } + closedir(dir); + + if (e == NULL) + return -1; + + return uio_num; +} + +/* Check map names with kernel names */ +static const char *map_names[VMBUS_MAX_RESOURCE] = { + [HV_TXRX_RING_MAP] = "txrx_rings", + [HV_INT_PAGE_MAP] = "int_page", + [HV_MON_PAGE_MAP] = "monitor_page", + [HV_RECV_BUF_MAP] = "recv:", + [HV_SEND_BUF_MAP] = "send:", +}; + + +/* map the resources of a vmbus device in virtual memory */ +int +rte_vmbus_map_device(struct rte_vmbus_device *dev) +{ + char uioname[PATH_MAX], filename[PATH_MAX]; + char dirname[PATH_MAX], mapname[64]; + int i; + + dev->uio_num = vmbus_get_uio_dev(dev, uioname, sizeof(uioname)); + if (dev->uio_num < 0) { + VMBUS_LOG(DEBUG, "Not managed by UIO driver, skipped"); + return 1; + } + + /* Extract resource value */ + for (i = 0; i < VMBUS_MAX_RESOURCE; i++) { + struct rte_mem_resource *res = &dev->resource[i]; + unsigned long len, gpad = 0; + char *cp; + + snprintf(dirname, sizeof(dirname), + "%s/maps/map%d", uioname, i); + + snprintf(filename, sizeof(filename), + "%s/name", dirname); + + if (get_sysfs_string(filename, mapname, sizeof(mapname)) < 0) { + VMBUS_LOG(ERR, "could not read %s", filename); + return -1; + } + + if (strncmp(map_names[i], mapname, strlen(map_names[i])) != 0) { + VMBUS_LOG(ERR, + "unexpected resource %s (expected %s)", + mapname, map_names[i]); + return -1; + } + + snprintf(filename, sizeof(filename), + "%s/size", dirname); + if (eal_parse_sysfs_value(filename, &len) < 0) { + VMBUS_LOG(ERR, + "could not read %s", filename); + return -1; + } + res->len = len; + + /* both send and receive buffers have gpad in name */ + cp = memchr(mapname, ':', sizeof(mapname)); + if (cp) + gpad = strtoul(cp+1, NULL, 0); + + /* put the GPAD value in physical address */ + res->phys_addr = gpad; + } + + return vmbus_uio_map_resource(dev); +} + +void +rte_vmbus_unmap_device(struct rte_vmbus_device *dev) +{ + vmbus_uio_unmap_resource(dev); +} + +/* Scan one vmbus sysfs entry, and fill the devices list from it. */ +static int +vmbus_scan_one(const char *name) +{ + struct rte_vmbus_device *dev, *dev2; + char filename[PATH_MAX]; + char dirname[PATH_MAX]; + unsigned long tmp; + + dev = calloc(1, sizeof(*dev)); + if (dev == NULL) + return -1; + + dev->device.bus = &rte_vmbus_bus.bus; + dev->device.name = strdup(name); + if (!dev->device.name) + goto error; + + /* sysfs base directory + * /sys/bus/vmbus/devices/7a08391f-f5a0-4ac0-9802-d13fd964f8df + * or on older kernel + * /sys/bus/vmbus/devices/vmbus_1 + */ + snprintf(dirname, sizeof(dirname), "%s/%s", + SYSFS_VMBUS_DEVICES, name); + + /* get device class */ + snprintf(filename, sizeof(filename), "%s/class_id", dirname); + if (parse_sysfs_uuid(filename, dev->class_id) < 0) + goto error; + + /* skip non-network devices */ + if (rte_uuid_compare(dev->class_id, vmbus_nic_uuid) != 0) { + free(dev); + return 0; + } + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/device_id", dirname); + if (parse_sysfs_uuid(filename, dev->device_id) < 0) + goto error; + + /* get relid */ + snprintf(filename, sizeof(filename), "%s/id", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) + goto error; + dev->relid = tmp; + + /* get monitor id */ + snprintf(filename, sizeof(filename), "%s/monitor_id", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) + goto error; + dev->monitor_id = tmp; + + /* get numa node (if present) */ + snprintf(filename, sizeof(filename), "%s/numa_node", + dirname); + + if (access(filename, R_OK) == 0) { + if (eal_parse_sysfs_value(filename, &tmp) < 0) + goto error; + dev->device.numa_node = tmp; + } else { + /* if no NUMA support, set default to 0 */ + dev->device.numa_node = SOCKET_ID_ANY; + } + + dev->device.devargs = vmbus_devargs_lookup(dev); + + /* device is valid, add in list (sorted) */ + VMBUS_LOG(DEBUG, "Adding vmbus device %s", name); + + TAILQ_FOREACH(dev2, &rte_vmbus_bus.device_list, next) { + int ret; + + ret = rte_uuid_compare(dev->device_id, dev2->device_id); + if (ret > 0) + continue; + + if (ret < 0) { + vmbus_insert_device(dev2, dev); + } else { /* already registered */ + VMBUS_LOG(NOTICE, + "%s already registered", name); + free(dev); + } + return 0; + } + + vmbus_add_device(dev); + return 0; +error: + VMBUS_LOG(DEBUG, "failed"); + + free(dev); + return -1; +} + +/* + * Scan the content of the vmbus, and the devices in the devices list + */ +int +rte_vmbus_scan(void) +{ + struct dirent *e; + DIR *dir; + + dir = opendir(SYSFS_VMBUS_DEVICES); + if (dir == NULL) { + if (errno == ENOENT) + return 0; + + VMBUS_LOG(ERR, "opendir %s failed: %s", + SYSFS_VMBUS_DEVICES, strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (vmbus_scan_one(e->d_name) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +void rte_vmbus_irq_mask(struct rte_vmbus_device *device) +{ + vmbus_uio_irq_control(device, 1); +} + +void rte_vmbus_irq_unmask(struct rte_vmbus_device *device) +{ + vmbus_uio_irq_control(device, 0); +} + +int rte_vmbus_irq_read(struct rte_vmbus_device *device) +{ + return vmbus_uio_irq_read(device); +} diff --git a/src/spdk/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c b/src/spdk/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c new file mode 100644 index 000000000..5451bfd15 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/linux/vmbus_uio.c @@ -0,0 +1,453 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <dirent.h> +#include <inttypes.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_memory.h> +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_bus_vmbus.h> +#include <rte_string_fns.h> + +#include "private.h" + +/** Pathname of VMBUS devices directory. */ +#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices" + +static void *vmbus_map_addr; + +/* Control interrupts */ +void vmbus_uio_irq_control(struct rte_vmbus_device *dev, int32_t onoff) +{ + if (write(dev->intr_handle.fd, &onoff, sizeof(onoff)) < 0) { + VMBUS_LOG(ERR, "cannot write to %d:%s", + dev->intr_handle.fd, strerror(errno)); + } +} + +int vmbus_uio_irq_read(struct rte_vmbus_device *dev) +{ + int32_t count; + int cc; + + cc = read(dev->intr_handle.fd, &count, sizeof(count)); + if (cc < (int)sizeof(count)) { + if (cc < 0) { + VMBUS_LOG(ERR, "IRQ read failed %s", + strerror(errno)); + return -errno; + } + VMBUS_LOG(ERR, "can't read IRQ count"); + return -EINVAL; + } + + return count; +} + +void +vmbus_uio_free_resource(struct rte_vmbus_device *dev, + struct mapped_vmbus_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + + if (dev->intr_handle.fd >= 0) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +vmbus_uio_alloc_resource(struct rte_vmbus_device *dev, + struct mapped_vmbus_resource **uio_res) +{ + char devname[PATH_MAX]; /* contains the /dev/uioX */ + + /* save fd if in primary process */ + snprintf(devname, sizeof(devname), "/dev/uio%u", dev->uio_num); + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + VMBUS_LOG(ERR, "Cannot open %s: %s", + devname, strerror(errno)); + goto error; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + VMBUS_LOG(ERR, "cannot store uio mmap details"); + goto error; + } + + strlcpy((*uio_res)->path, devname, PATH_MAX); + rte_uuid_copy((*uio_res)->id, dev->device_id); + + return 0; + +error: + vmbus_uio_free_resource(dev, *uio_res); + return -1; +} + +static int +find_max_end_va(const struct rte_memseg_list *msl, void *arg) +{ + size_t sz = msl->memseg_arr.len * msl->page_sz; + void *end_va = RTE_PTR_ADD(msl->base_va, sz); + void **max_va = arg; + + if (*max_va < end_va) + *max_va = end_va; + return 0; +} + +/* + * TODO: this should be part of memseg api. + * code is duplicated from PCI. + */ +static void * +vmbus_find_max_end_va(void) +{ + void *va = NULL; + + rte_memseg_list_walk(find_max_end_va, &va); + return va; +} + +int +vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev, int idx, + struct mapped_vmbus_resource *uio_res, + int flags) +{ + size_t size = dev->resource[idx].len; + struct vmbus_map *maps = uio_res->maps; + void *mapaddr; + off_t offset; + int fd; + + /* devname for mmap */ + fd = open(uio_res->path, O_RDWR); + if (fd < 0) { + VMBUS_LOG(ERR, "Cannot open %s: %s", + uio_res->path, strerror(errno)); + return -1; + } + + /* try mapping somewhere close to the end of hugepages */ + if (vmbus_map_addr == NULL) + vmbus_map_addr = vmbus_find_max_end_va(); + + /* offset is special in uio it indicates which resource */ + offset = idx * PAGE_SIZE; + + mapaddr = vmbus_map_resource(vmbus_map_addr, fd, offset, size, flags); + close(fd); + + if (mapaddr == MAP_FAILED) + return -1; + + dev->resource[idx].addr = mapaddr; + vmbus_map_addr = RTE_PTR_ADD(mapaddr, size); + + /* Record result of successful mapping for use by secondary */ + maps[idx].addr = mapaddr; + maps[idx].size = size; + + return 0; +} + +static int vmbus_uio_map_primary(struct vmbus_channel *chan, + void **ring_buf, uint32_t *ring_size) +{ + struct mapped_vmbus_resource *uio_res; + + uio_res = vmbus_uio_find_resource(chan->device); + if (!uio_res) { + VMBUS_LOG(ERR, "can not find resources!"); + return -ENOMEM; + } + + if (uio_res->nb_maps < VMBUS_MAX_RESOURCE) { + VMBUS_LOG(ERR, "VMBUS: only %u resources found!", + uio_res->nb_maps); + return -EINVAL; + } + + *ring_size = uio_res->maps[HV_TXRX_RING_MAP].size / 2; + *ring_buf = uio_res->maps[HV_TXRX_RING_MAP].addr; + return 0; +} + +static int vmbus_uio_map_subchan(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan, + void **ring_buf, uint32_t *ring_size) +{ + char ring_path[PATH_MAX]; + size_t file_size; + struct stat sb; + void *mapaddr; + int fd; + + snprintf(ring_path, sizeof(ring_path), + "%s/%s/channels/%u/ring", + SYSFS_VMBUS_DEVICES, dev->device.name, + chan->relid); + + fd = open(ring_path, O_RDWR); + if (fd < 0) { + VMBUS_LOG(ERR, "Cannot open %s: %s", + ring_path, strerror(errno)); + return -errno; + } + + if (fstat(fd, &sb) < 0) { + VMBUS_LOG(ERR, "Cannot state %s: %s", + ring_path, strerror(errno)); + close(fd); + return -errno; + } + file_size = sb.st_size; + + if (file_size == 0 || (file_size & (PAGE_SIZE - 1))) { + VMBUS_LOG(ERR, "incorrect size %s: %zu", + ring_path, file_size); + + close(fd); + return -EINVAL; + } + + mapaddr = vmbus_map_resource(vmbus_map_addr, fd, + 0, file_size, 0); + close(fd); + + if (mapaddr == MAP_FAILED) + return -EIO; + + *ring_size = file_size / 2; + *ring_buf = mapaddr; + + vmbus_map_addr = RTE_PTR_ADD(ring_buf, file_size); + return 0; +} + +int +vmbus_uio_map_secondary_subchan(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan) +{ + const struct vmbus_br *br = &chan->txbr; + char ring_path[PATH_MAX]; + void *mapaddr, *ring_buf; + uint32_t ring_size; + int fd; + + snprintf(ring_path, sizeof(ring_path), + "%s/%s/channels/%u/ring", + SYSFS_VMBUS_DEVICES, dev->device.name, + chan->relid); + + ring_buf = br->vbr; + ring_size = br->dsize + sizeof(struct vmbus_bufring); + VMBUS_LOG(INFO, "secondary ring_buf %p size %u", + ring_buf, ring_size); + + fd = open(ring_path, O_RDWR); + if (fd < 0) { + VMBUS_LOG(ERR, "Cannot open %s: %s", + ring_path, strerror(errno)); + return -errno; + } + + mapaddr = vmbus_map_resource(ring_buf, fd, 0, 2 * ring_size, 0); + close(fd); + + if (mapaddr == ring_buf) + return 0; + + if (mapaddr == MAP_FAILED) + VMBUS_LOG(ERR, + "mmap subchan %u in secondary failed", chan->relid); + else { + VMBUS_LOG(ERR, + "mmap subchan %u in secondary address mismatch", + chan->relid); + vmbus_unmap_resource(mapaddr, 2 * ring_size); + } + return -1; +} + +int vmbus_uio_map_rings(struct vmbus_channel *chan) +{ + const struct rte_vmbus_device *dev = chan->device; + uint32_t ring_size; + void *ring_buf; + int ret; + + /* Primary channel */ + if (chan->subchannel_id == 0) + ret = vmbus_uio_map_primary(chan, &ring_buf, &ring_size); + else + ret = vmbus_uio_map_subchan(dev, chan, &ring_buf, &ring_size); + + if (ret) + return ret; + + vmbus_br_setup(&chan->txbr, ring_buf, ring_size); + vmbus_br_setup(&chan->rxbr, (char *)ring_buf + ring_size, ring_size); + return 0; +} + +static int vmbus_uio_sysfs_read(const char *dir, const char *name, + unsigned long *val, unsigned long max_range) +{ + char path[PATH_MAX]; + FILE *f; + int ret; + + snprintf(path, sizeof(path), "%s/%s", dir, name); + f = fopen(path, "r"); + if (!f) { + VMBUS_LOG(ERR, "can't open %s:%s", + path, strerror(errno)); + return -errno; + } + + if (fscanf(f, "%lu", val) != 1) + ret = -EIO; + else if (*val > max_range) + ret = -ERANGE; + else + ret = 0; + fclose(f); + + return ret; +} + +static bool vmbus_uio_ring_present(const struct rte_vmbus_device *dev, + uint32_t relid) +{ + char ring_path[PATH_MAX]; + + /* Check if kernel has subchannel sysfs files */ + snprintf(ring_path, sizeof(ring_path), + "%s/%s/channels/%u/ring", + SYSFS_VMBUS_DEVICES, dev->device.name, relid); + + return access(ring_path, R_OK|W_OK) == 0; +} + +bool vmbus_uio_subchannels_supported(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan) +{ + return vmbus_uio_ring_present(dev, chan->relid); +} + +static bool vmbus_isnew_subchannel(struct vmbus_channel *primary, + unsigned long id) +{ + const struct vmbus_channel *c; + + STAILQ_FOREACH(c, &primary->subchannel_list, next) { + if (c->relid == id) + return false; + } + return true; +} + +int vmbus_uio_get_subchan(struct vmbus_channel *primary, + struct vmbus_channel **subchan) +{ + const struct rte_vmbus_device *dev = primary->device; + char chan_path[PATH_MAX], subchan_path[PATH_MAX]; + struct dirent *ent; + DIR *chan_dir; + int err; + + snprintf(chan_path, sizeof(chan_path), + "%s/%s/channels", + SYSFS_VMBUS_DEVICES, dev->device.name); + + chan_dir = opendir(chan_path); + if (!chan_dir) { + VMBUS_LOG(ERR, "cannot open %s: %s", + chan_path, strerror(errno)); + return -errno; + } + + while ((ent = readdir(chan_dir))) { + unsigned long relid, subid, monid; + char *endp; + + if (ent->d_name[0] == '.') + continue; + + errno = 0; + relid = strtoul(ent->d_name, &endp, 0); + if (*endp || errno != 0 || relid > UINT16_MAX) { + VMBUS_LOG(NOTICE, "not a valid channel relid: %s", + ent->d_name); + continue; + } + + if (!vmbus_isnew_subchannel(primary, relid)) { + VMBUS_LOG(DEBUG, "skip already found channel: %lu", + relid); + continue; + } + + if (!vmbus_uio_ring_present(dev, relid)) { + VMBUS_LOG(DEBUG, "ring mmap not found (yet) for: %lu", + relid); + continue; + } + + snprintf(subchan_path, sizeof(subchan_path), "%s/%lu", + chan_path, relid); + err = vmbus_uio_sysfs_read(subchan_path, "subchannel_id", + &subid, UINT16_MAX); + if (err) { + VMBUS_LOG(NOTICE, "no subchannel_id in %s:%s", + subchan_path, strerror(-err)); + goto fail; + } + + if (subid == 0) + continue; /* skip primary channel */ + + err = vmbus_uio_sysfs_read(subchan_path, "monitor_id", + &monid, UINT8_MAX); + if (err) { + VMBUS_LOG(NOTICE, "no monitor_id in %s:%s", + subchan_path, strerror(-err)); + goto fail; + } + + err = vmbus_chan_create(dev, relid, subid, monid, subchan); + if (err) { + VMBUS_LOG(ERR, "subchannel setup failed"); + goto fail; + } + break; + } + closedir(chan_dir); + + return (ent == NULL) ? -ENOENT : 0; +fail: + closedir(chan_dir); + return err; +} diff --git a/src/spdk/dpdk/drivers/bus/vmbus/meson.build b/src/spdk/dpdk/drivers/bus/vmbus/meson.build new file mode 100644 index 000000000..a68a1de9d --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/meson.build @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: BSD-3-Clause + +install_headers('rte_bus_vmbus.h','rte_vmbus_reg.h') + +sources = files('vmbus_common.c', + 'vmbus_channel.c', + 'vmbus_bufring.c', + 'vmbus_common_uio.c') + +if is_linux + sources += files('linux/vmbus_bus.c', + 'linux/vmbus_uio.c') + includes += include_directories('linux') +else + build = false + reason = 'only supported on linux' +endif diff --git a/src/spdk/dpdk/drivers/bus/vmbus/private.h b/src/spdk/dpdk/drivers/bus/vmbus/private.h new file mode 100644 index 000000000..f19b14e4a --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/private.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#ifndef _VMBUS_PRIVATE_H_ +#define _VMBUS_PRIVATE_H_ + +#include <stdbool.h> +#include <sys/uio.h> +#include <rte_log.h> +#include <rte_vmbus_reg.h> +#include <rte_bus_vmbus.h> + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +extern struct rte_vmbus_bus rte_vmbus_bus; + +extern int vmbus_logtype_bus; +#define VMBUS_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, vmbus_logtype_bus, "%s(): " fmt "\n", \ + __func__, ##args) + +struct vmbus_br { + struct vmbus_bufring *vbr; + uint32_t dsize; + uint32_t windex; /* next available location */ +}; + +#define UIO_NAME_MAX 64 + +struct vmbus_map { + void *addr; /* user mmap of resource */ + uint64_t size; /* length */ +}; + +/* + * For multi-process we need to reproduce all vmbus mappings in secondary + * processes, so save them in a tailq. + */ +struct mapped_vmbus_resource { + TAILQ_ENTRY(mapped_vmbus_resource) next; + + rte_uuid_t id; + int nb_maps; + struct vmbus_channel *primary; + struct vmbus_map maps[VMBUS_MAX_RESOURCE]; + char path[PATH_MAX]; +}; + +TAILQ_HEAD(mapped_vmbus_res_list, mapped_vmbus_resource); + +#define HV_MON_TRIG_LEN 32 +#define HV_MON_TRIG_MAX 4 + +struct vmbus_channel { + STAILQ_HEAD(, vmbus_channel) subchannel_list; + STAILQ_ENTRY(vmbus_channel) next; + const struct rte_vmbus_device *device; + + struct vmbus_br rxbr; + struct vmbus_br txbr; + + uint16_t relid; + uint16_t subchannel_id; + uint8_t monitor_id; +}; + +#define VMBUS_MAX_CHANNELS 64 + +struct rte_devargs * +vmbus_devargs_lookup(struct rte_vmbus_device *dev); + +int vmbus_chan_create(const struct rte_vmbus_device *device, + uint16_t relid, uint16_t subid, uint8_t monitor_id, + struct vmbus_channel **new_chan); + +void vmbus_add_device(struct rte_vmbus_device *vmbus_dev); +void vmbus_insert_device(struct rte_vmbus_device *exist_vmbus_dev, + struct rte_vmbus_device *new_vmbus_dev); +void vmbus_remove_device(struct rte_vmbus_device *vmbus_device); + +void vmbus_uio_irq_control(struct rte_vmbus_device *dev, int32_t onoff); +int vmbus_uio_irq_read(struct rte_vmbus_device *dev); + +int vmbus_uio_map_resource(struct rte_vmbus_device *dev); +void vmbus_uio_unmap_resource(struct rte_vmbus_device *dev); + +int vmbus_uio_alloc_resource(struct rte_vmbus_device *dev, + struct mapped_vmbus_resource **uio_res); +void vmbus_uio_free_resource(struct rte_vmbus_device *dev, + struct mapped_vmbus_resource *uio_res); + +struct mapped_vmbus_resource * +vmbus_uio_find_resource(const struct rte_vmbus_device *dev); +int vmbus_uio_map_resource_by_index(struct rte_vmbus_device *dev, int res_idx, + struct mapped_vmbus_resource *uio_res, + int flags); + +void *vmbus_map_resource(void *requested_addr, int fd, off_t offset, + size_t size, int additional_flags); +void vmbus_unmap_resource(void *requested_addr, size_t size); + +bool vmbus_uio_subchannels_supported(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan); +int vmbus_uio_get_subchan(struct vmbus_channel *primary, + struct vmbus_channel **subchan); +int vmbus_uio_map_rings(struct vmbus_channel *chan); +int vmbus_uio_map_secondary_subchan(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan); + +void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen); + +/* Amount of space available for write */ +static inline uint32_t +vmbus_br_availwrite(const struct vmbus_br *br, uint32_t windex) +{ + uint32_t rindex = br->vbr->rindex; + + if (windex >= rindex) + return br->dsize - (windex - rindex); + else + return rindex - windex; +} + +static inline uint32_t +vmbus_br_availread(const struct vmbus_br *br) +{ + return br->dsize - vmbus_br_availwrite(br, br->vbr->windex); +} + +int vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen, + bool *need_sig); + +int vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen); + +int vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t hlen); + +#endif /* _VMBUS_PRIVATE_H_ */ diff --git a/src/spdk/dpdk/drivers/bus/vmbus/rte_bus_vmbus.h b/src/spdk/dpdk/drivers/bus/vmbus/rte_bus_vmbus.h new file mode 100644 index 000000000..4cf73ce81 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/rte_bus_vmbus.h @@ -0,0 +1,421 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#ifndef _VMBUS_H_ +#define _VMBUS_H_ + +/** + * @file + * + * VMBUS Interface + */ +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <stdbool.h> +#include <errno.h> +#include <sys/queue.h> +#include <stdint.h> +#include <inttypes.h> + +#include <rte_compat.h> +#include <rte_uuid.h> +#include <rte_debug.h> +#include <rte_interrupts.h> +#include <rte_dev.h> +#include <rte_vmbus_reg.h> + +/* Forward declarations */ +struct rte_vmbus_device; +struct rte_vmbus_driver; +struct rte_vmbus_bus; +struct vmbus_channel; +struct vmbus_mon_page; + +TAILQ_HEAD(rte_vmbus_device_list, rte_vmbus_device); +TAILQ_HEAD(rte_vmbus_driver_list, rte_vmbus_driver); + +/* VMBus iterators */ +#define FOREACH_DEVICE_ON_VMBUS(p) \ + TAILQ_FOREACH(p, &(rte_vmbus_bus.device_list), next) + +#define FOREACH_DRIVER_ON_VMBUS(p) \ + TAILQ_FOREACH(p, &(rte_vmbus_bus.driver_list), next) + +/** Maximum number of VMBUS resources. */ +enum hv_uio_map { + HV_TXRX_RING_MAP = 0, + HV_INT_PAGE_MAP, + HV_MON_PAGE_MAP, + HV_RECV_BUF_MAP, + HV_SEND_BUF_MAP +}; +#define VMBUS_MAX_RESOURCE 5 + +/** + * A structure describing a VMBUS device. + */ +struct rte_vmbus_device { + TAILQ_ENTRY(rte_vmbus_device) next; /**< Next probed VMBUS device */ + const struct rte_vmbus_driver *driver; /**< Associated driver */ + struct rte_device device; /**< Inherit core device */ + rte_uuid_t device_id; /**< VMBUS device id */ + rte_uuid_t class_id; /**< VMBUS device type */ + uint32_t relid; /**< id for primary */ + uint8_t monitor_id; /**< monitor page */ + int uio_num; /**< UIO device number */ + uint32_t *int_page; /**< VMBUS interrupt page */ + struct vmbus_channel *primary; /**< VMBUS primary channel */ + struct vmbus_mon_page *monitor_page; /**< VMBUS monitor page */ + + struct rte_intr_handle intr_handle; /**< Interrupt handle */ + struct rte_mem_resource resource[VMBUS_MAX_RESOURCE]; +}; + +/** + * Initialization function for the driver called during VMBUS probing. + */ +typedef int (vmbus_probe_t)(struct rte_vmbus_driver *, + struct rte_vmbus_device *); + +/** + * Initialization function for the driver called during hot plugging. + */ +typedef int (vmbus_remove_t)(struct rte_vmbus_device *); + +/** + * A structure describing a VMBUS driver. + */ +struct rte_vmbus_driver { + TAILQ_ENTRY(rte_vmbus_driver) next; /**< Next in list. */ + struct rte_driver driver; + struct rte_vmbus_bus *bus; /**< VM bus reference. */ + vmbus_probe_t *probe; /**< Device Probe function. */ + vmbus_remove_t *remove; /**< Device Remove function. */ + + const rte_uuid_t *id_table; /**< ID table. */ +}; + + +/** + * Structure describing the VM bus + */ +struct rte_vmbus_bus { + struct rte_bus bus; /**< Inherit the generic class */ + struct rte_vmbus_device_list device_list; /**< List of devices */ + struct rte_vmbus_driver_list driver_list; /**< List of drivers */ +}; + +/** + * Scan the content of the VMBUS bus, and the devices in the devices + * list + * + * @return + * 0 on success, negative on error + */ +int rte_vmbus_scan(void); + +/** + * Probe the VMBUS bus + * + * @return + * - 0 on success. + * - !0 on error. + */ +int rte_vmbus_probe(void); + +/** + * Map the VMBUS device resources in user space virtual memory address + * + * @param dev + * A pointer to a rte_vmbus_device structure describing the device + * to use + * + * @return + * 0 on success, negative on error and positive if no driver + * is found for the device. + */ +int rte_vmbus_map_device(struct rte_vmbus_device *dev); + +/** + * Unmap this device + * + * @param dev + * A pointer to a rte_vmbus_device structure describing the device + * to use + */ +void rte_vmbus_unmap_device(struct rte_vmbus_device *dev); + +/** + * Get connection to primary VMBUS channel + * + * @param device + * A pointer to a rte_vmbus_device structure describing the device + * @param chan + * A pointer to a VMBUS channel pointer that will be filled. + * @return + * - 0 Success; channel opened. + * - -ENOMEM: Not enough memory available. + * - -EINVAL: Regions could not be mapped. + */ +int rte_vmbus_chan_open(struct rte_vmbus_device *device, + struct vmbus_channel **chan); + +/** + * Free connection to VMBUS channel + * + * @param chan + * VMBUS channel + */ +void rte_vmbus_chan_close(struct vmbus_channel *chan); + +/** + * Gets the maximum number of channels supported on device + * + * @param device + * A pointer to a rte_vmbus_device structure describing the device + * @return + * Number of channels available. + */ +int rte_vmbus_max_channels(const struct rte_vmbus_device *device); + +/** + * Get a connection to new secondary vmbus channel + * + * @param primary + * A pointer to primary VMBUS channel + * @param chan + * A pointer to a secondary VMBUS channel pointer that will be filled. + * @return + * - 0 Success; channel opened. + * - -ENOMEM: Not enough memory available. + * - -EINVAL: Regions could not be mapped. + */ +int rte_vmbus_subchan_open(struct vmbus_channel *primary, + struct vmbus_channel **new_chan); + +/** + * Disable IRQ for device + * + * @param device + * VMBUS device + */ +void rte_vmbus_irq_mask(struct rte_vmbus_device *device); + +/** + * Enable IRQ for device + * + * @param device + * VMBUS device + */ +void rte_vmbus_irq_unmask(struct rte_vmbus_device *device); + +/** + * Read (and wait) for IRQ + * + * @param device + * VMBUS device + */ +int rte_vmbus_irq_read(struct rte_vmbus_device *device); + +/** + * Test if channel is empty + * + * @param channel + * Pointer to vmbus_channel structure. + * @return + * Return true if no data present in incoming ring. + */ +bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel); + +/** + * Send the specified buffer on the given channel + * + * @param channel + * Pointer to vmbus_channel structure. + * @param type + * Type of packet that is being send e.g. negotiate, time + * packet etc. + * @param data + * Pointer to the buffer to send + * @param dlen + * Number of bytes of data to send + * @param xact + * Identifier of the request + * @param flags + * Message type inband, rxbuf, gpa + * @param need_sig + * Is host signal tx is required (optional) + * + * Sends data in buffer directly to hyper-v via the vmbus + */ +int rte_vmbus_chan_send(struct vmbus_channel *channel, uint16_t type, + void *data, uint32_t dlen, + uint64_t xact, uint32_t flags, bool *need_sig); + +/** + * Explicitly signal host that data is available + * + * @param + * Pointer to vmbus_channel structure. + * + * Used when batching multiple sends and only signaling host + * after the last send. + */ +void rte_vmbus_chan_signal_tx(const struct vmbus_channel *channel); + +/* Structure for scatter/gather I/O */ +struct iova_list { + rte_iova_t addr; + uint32_t len; +}; +#define MAX_PAGE_BUFFER_COUNT 32 + +/** + * Send a scattered buffer on the given channel + * + * @param channel + * Pointer to vmbus_channel structure. + * @param type + * Type of packet that is being send e.g. negotiate, time + * packet etc. + * @param gpa + * Array of buffers to send + * @param gpacnt + * Number of elements in iov + * @param data + * Pointer to the buffer additional data to send + * @param dlen + * Maximum size of what the the buffer will hold + * @param xact + * Identifier of the request + * @param flags + * Message type inband, rxbuf, gpa + * @param need_sig + * Is host signal tx is required (optional) + * + * Sends data in buffer directly to hyper-v via the vmbus + */ +int rte_vmbus_chan_send_sglist(struct vmbus_channel *channel, + struct vmbus_gpa gpa[], uint32_t gpacnt, + void *data, uint32_t dlen, + uint64_t xact, bool *need_sig); +/** + * Receive response to request on the given channel + * skips the channel header. + * + * @param channel + * Pointer to vmbus_channel structure. + * @param data + * Pointer to the buffer you want to receive the data into. + * @param len + * Pointer to size of receive buffer (in/out) + * @param + * Pointer to received transaction_id + * @return + * On success, returns 0 + * On failure, returns negative errno. + */ +int rte_vmbus_chan_recv(struct vmbus_channel *chan, + void *data, uint32_t *len, + uint64_t *request_id); + +/** + * Receive response to request on the given channel + * includes the channel header. + * + * @param channel + * Pointer to vmbus_channel structure. + * @param data + * Pointer to the buffer you want to receive the data into. + * @param len + * Pointer to size of receive buffer (in/out) + * @return + * On success, returns number of bytes read. + * On failure, returns negative errno. + */ +int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan, + void *data, uint32_t *len); + +/** + * Notify host of bytes read (after recv_raw) + * Signals host if required. + * + * @param channel + * Pointer to vmbus_channel structure. + * @param bytes_read + * Number of bytes read since last signal + */ +void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read); + +/** + * Determine sub channel index of the given channel + * + * @param channel + * Pointer to vmbus_channel structure. + * @return + * Sub channel index (0 for primary) + */ +uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan); + +/** + * Set the host monitor latency hint + * + * @param dev + * VMBUS device + * @param chan + * Pointer to vmbus_channel structure. + * @param latency + * Approximate wait period between hypervisor examinations of + * the trigger page (in nanoseconds). + */ +void rte_vmbus_set_latency(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan, + uint32_t latency); + +/** + * Register a VMBUS driver. + * + * @param driver + * A pointer to a rte_vmbus_driver structure describing the driver + * to be registered. + */ +void rte_vmbus_register(struct rte_vmbus_driver *driver); + +/** + * For debug dump contents of ring buffer. + * + * @param channel + * Pointer to vmbus_channel structure. + */ +void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan); + +/** + * Unregister a VMBUS driver. + * + * @param driver + * A pointer to a rte_vmbus_driver structure describing the driver + * to be unregistered. + */ +void rte_vmbus_unregister(struct rte_vmbus_driver *driver); + +/** Helper for VMBUS device registration from driver instance */ +#define RTE_PMD_REGISTER_VMBUS(nm, vmbus_drv) \ + RTE_INIT(vmbusinitfn_ ##nm) \ + { \ + (vmbus_drv).driver.name = RTE_STR(nm); \ + rte_vmbus_register(&vmbus_drv); \ + } \ + RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + +#ifdef __cplusplus +} +#endif + +#endif /* _VMBUS_H_ */ diff --git a/src/spdk/dpdk/drivers/bus/vmbus/rte_bus_vmbus_version.map b/src/spdk/dpdk/drivers/bus/vmbus/rte_bus_vmbus_version.map new file mode 100644 index 000000000..cbaaebc06 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/rte_bus_vmbus_version.map @@ -0,0 +1,28 @@ +DPDK_20.0 { + global: + + rte_vmbus_chan_close; + rte_vmbus_chan_open; + rte_vmbus_chan_recv; + rte_vmbus_chan_recv_raw; + rte_vmbus_chan_rx_empty; + rte_vmbus_chan_send; + rte_vmbus_chan_send_sglist; + rte_vmbus_chan_signal_read; + rte_vmbus_chan_signal_tx; + rte_vmbus_irq_mask; + rte_vmbus_irq_read; + rte_vmbus_irq_unmask; + rte_vmbus_map_device; + rte_vmbus_max_channels; + rte_vmbus_probe; + rte_vmbus_register; + rte_vmbus_scan; + rte_vmbus_set_latency; + rte_vmbus_sub_channel_index; + rte_vmbus_subchan_open; + rte_vmbus_unmap_device; + rte_vmbus_unregister; + + local: *; +}; diff --git a/src/spdk/dpdk/drivers/bus/vmbus/rte_vmbus_reg.h b/src/spdk/dpdk/drivers/bus/vmbus/rte_vmbus_reg.h new file mode 100644 index 000000000..f5a0693dc --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/rte_vmbus_reg.h @@ -0,0 +1,344 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#ifndef _VMBUS_REG_H_ +#define _VMBUS_REG_H_ + +/* + * Hyper-V SynIC message format. + */ +#define VMBUS_MSG_DSIZE_MAX 240 +#define VMBUS_MSG_SIZE 256 + +struct vmbus_message { + uint32_t type; /* HYPERV_MSGTYPE_ */ + uint8_t dsize; /* data size */ + uint8_t flags; /* VMBUS_MSGFLAG_ */ + uint16_t rsvd; + uint64_t id; + uint8_t data[VMBUS_MSG_DSIZE_MAX]; +} __rte_packed; + +#define VMBUS_MSGFLAG_PENDING 0x01 + +/* + * Hyper-V Monitor Notification Facility + */ + +struct vmbus_mon_trig { + uint32_t pending; + uint32_t armed; +} __rte_packed; + +#define VMBUS_MONTRIGS_MAX 4 +#define VMBUS_MONTRIG_LEN 32 + +/* + * Hyper-V Monitor Notification Facility + */ +struct hyperv_mon_param { + uint32_t connid; + uint16_t evtflag_ofs; + uint16_t rsvd; +} __rte_packed; + +struct vmbus_mon_page { + uint32_t state; + uint32_t rsvd1; + + struct vmbus_mon_trig trigs[VMBUS_MONTRIGS_MAX]; + uint8_t rsvd2[536]; + + uint16_t lat[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN]; + uint8_t rsvd3[256]; + + struct hyperv_mon_param + param[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN]; + uint8_t rsvd4[1984]; +} __rte_packed; + +/* + * Buffer ring + */ + +struct vmbus_bufring { + volatile uint32_t windex; + volatile uint32_t rindex; + + /* + * Interrupt mask {0,1} + * + * For TX bufring, host set this to 1, when it is processing + * the TX bufring, so that we can safely skip the TX event + * notification to host. + * + * For RX bufring, once this is set to 1 by us, host will not + * further dispatch interrupts to us, even if there are data + * pending on the RX bufring. This effectively disables the + * interrupt of the channel to which this RX bufring is attached. + */ + volatile uint32_t imask; + + /* + * Win8 uses some of the reserved bits to implement + * interrupt driven flow management. On the send side + * we can request that the receiver interrupt the sender + * when the ring transitions from being full to being able + * to handle a message of size "pending_send_sz". + * + * Add necessary state for this enhancement. + */ + volatile uint32_t pending_send; + uint32_t reserved1[12]; + + union { + struct { + uint32_t feat_pending_send_sz:1; + }; + uint32_t value; + } feature_bits; + + /* Pad it to PAGE_SIZE so that data starts on page boundary */ + uint8_t reserved2[4028]; + + /* + * Ring data starts here + RingDataStartOffset + * !!! DO NOT place any fields below this !!! + */ + uint8_t data[0]; +} __rte_packed; + +/* + * Channel packets + */ + +/* Channel packet flags */ +#define VMBUS_CHANPKT_TYPE_INBAND 0x0006 +#define VMBUS_CHANPKT_TYPE_RXBUF 0x0007 +#define VMBUS_CHANPKT_TYPE_GPA 0x0009 +#define VMBUS_CHANPKT_TYPE_COMP 0x000b + +#define VMBUS_CHANPKT_FLAG_NONE 0 +#define VMBUS_CHANPKT_FLAG_RC 0x0001 /* report completion */ + +#define VMBUS_CHANPKT_SIZE_SHIFT 3 +#define VMBUS_CHANPKT_SIZE_ALIGN (1 << VMBUS_CHANPKT_SIZE_SHIFT) +#define VMBUS_CHANPKT_HLEN_MIN \ + (sizeof(struct vmbus_chanpkt_hdr) >> VMBUS_CHANPKT_SIZE_SHIFT) + +static inline uint32_t +vmbus_chanpkt_getlen(uint16_t pktlen) +{ + return (uint32_t)pktlen << VMBUS_CHANPKT_SIZE_SHIFT; +} + +/* + * GPA stuffs. + */ +struct vmbus_gpa_range { + uint32_t len; + uint32_t ofs; + uint64_t page[0]; +} __rte_packed; + +/* This is actually vmbus_gpa_range.gpa_page[1] */ +struct vmbus_gpa { + uint32_t len; + uint32_t ofs; + uint64_t page; +} __rte_packed; + +struct vmbus_chanpkt_hdr { + uint16_t type; /* VMBUS_CHANPKT_TYPE_ */ + uint16_t hlen; /* header len, in 8 bytes */ + uint16_t tlen; /* total len, in 8 bytes */ + uint16_t flags; /* VMBUS_CHANPKT_FLAG_ */ + uint64_t xactid; +} __rte_packed; + +static inline uint32_t +vmbus_chanpkt_datalen(const struct vmbus_chanpkt_hdr *pkt) +{ + return vmbus_chanpkt_getlen(pkt->tlen) + - vmbus_chanpkt_getlen(pkt->hlen); +} + +struct vmbus_chanpkt { + struct vmbus_chanpkt_hdr hdr; +} __rte_packed; + +struct vmbus_rxbuf_desc { + uint32_t len; + uint32_t ofs; +} __rte_packed; + +struct vmbus_chanpkt_rxbuf { + struct vmbus_chanpkt_hdr hdr; + uint16_t rxbuf_id; + uint16_t rsvd; + uint32_t rxbuf_cnt; + struct vmbus_rxbuf_desc rxbuf[]; +} __rte_packed; + +struct vmbus_chanpkt_sglist { + struct vmbus_chanpkt_hdr hdr; + uint32_t rsvd; + uint32_t gpa_cnt; + struct vmbus_gpa gpa[]; +} __rte_packed; + +/* + * Channel messages + * - Embedded in vmbus_message.msg_data, e.g. response and notification. + * - Embedded in hypercall_postmsg_in.hc_data, e.g. request. + */ + +#define VMBUS_CHANMSG_TYPE_CHOFFER 1 /* NOTE */ +#define VMBUS_CHANMSG_TYPE_CHRESCIND 2 /* NOTE */ +#define VMBUS_CHANMSG_TYPE_CHREQUEST 3 /* REQ */ +#define VMBUS_CHANMSG_TYPE_CHOFFER_DONE 4 /* NOTE */ +#define VMBUS_CHANMSG_TYPE_CHOPEN 5 /* REQ */ +#define VMBUS_CHANMSG_TYPE_CHOPEN_RESP 6 /* RESP */ +#define VMBUS_CHANMSG_TYPE_CHCLOSE 7 /* REQ */ +#define VMBUS_CHANMSG_TYPE_GPADL_CONN 8 /* REQ */ +#define VMBUS_CHANMSG_TYPE_GPADL_SUBCONN 9 /* REQ */ +#define VMBUS_CHANMSG_TYPE_GPADL_CONNRESP 10 /* RESP */ +#define VMBUS_CHANMSG_TYPE_GPADL_DISCONN 11 /* REQ */ +#define VMBUS_CHANMSG_TYPE_GPADL_DISCONNRESP 12 /* RESP */ +#define VMBUS_CHANMSG_TYPE_CHFREE 13 /* REQ */ +#define VMBUS_CHANMSG_TYPE_CONNECT 14 /* REQ */ +#define VMBUS_CHANMSG_TYPE_CONNECT_RESP 15 /* RESP */ +#define VMBUS_CHANMSG_TYPE_DISCONNECT 16 /* REQ */ +#define VMBUS_CHANMSG_TYPE_MAX 22 + +struct vmbus_chanmsg_hdr { + uint32_t type; /* VMBUS_CHANMSG_TYPE_ */ + uint32_t rsvd; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CONNECT */ +struct vmbus_chanmsg_connect { + struct vmbus_chanmsg_hdr hdr; + uint32_t ver; + uint32_t rsvd; + uint64_t evtflags; + uint64_t mnf1; + uint64_t mnf2; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CONNECT_RESP */ +struct vmbus_chanmsg_connect_resp { + struct vmbus_chanmsg_hdr hdr; + uint8_t done; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CHREQUEST */ +struct vmbus_chanmsg_chrequest { + struct vmbus_chanmsg_hdr hdr; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_DISCONNECT */ +struct vmbus_chanmsg_disconnect { + struct vmbus_chanmsg_hdr hdr; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CHOPEN */ +struct vmbus_chanmsg_chopen { + struct vmbus_chanmsg_hdr hdr; + uint32_t chanid; + uint32_t openid; + uint32_t gpadl; + uint32_t vcpuid; + uint32_t txbr_pgcnt; +#define VMBUS_CHANMSG_CHOPEN_UDATA_SIZE 120 + uint8_t udata[VMBUS_CHANMSG_CHOPEN_UDATA_SIZE]; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CHOPEN_RESP */ +struct vmbus_chanmsg_chopen_resp { + struct vmbus_chanmsg_hdr hdr; + uint32_t chanid; + uint32_t openid; + uint32_t status; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_GPADL_CONN */ +struct vmbus_chanmsg_gpadl_conn { + struct vmbus_chanmsg_hdr hdr; + uint32_t chanid; + uint32_t gpadl; + uint16_t range_len; + uint16_t range_cnt; + struct vmbus_gpa_range range; +} __rte_packed; + +#define VMBUS_CHANMSG_GPADL_CONN_PGMAX 26 + +/* VMBUS_CHANMSG_TYPE_GPADL_SUBCONN */ +struct vmbus_chanmsg_gpadl_subconn { + struct vmbus_chanmsg_hdr hdr; + uint32_t msgno; + uint32_t gpadl; + uint64_t gpa_page[]; +} __rte_packed; + +#define VMBUS_CHANMSG_GPADL_SUBCONN_PGMAX 28 + +/* VMBUS_CHANMSG_TYPE_GPADL_CONNRESP */ +struct vmbus_chanmsg_gpadl_connresp { + struct vmbus_chanmsg_hdr hdr; + uint32_t chanid; + uint32_t gpadl; + uint32_t status; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CHCLOSE */ +struct vmbus_chanmsg_chclose { + struct vmbus_chanmsg_hdr hdr; + uint32_t chanid; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_GPADL_DISCONN */ +struct vmbus_chanmsg_gpadl_disconn { + struct vmbus_chanmsg_hdr hdr; + uint32_t chanid; + uint32_t gpadl; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CHFREE */ +struct vmbus_chanmsg_chfree { + struct vmbus_chanmsg_hdr hdr; + uint32_t chanid; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CHRESCIND */ +struct vmbus_chanmsg_chrescind { + struct vmbus_chanmsg_hdr hdr; + uint32_t chanid; +} __rte_packed; + +/* VMBUS_CHANMSG_TYPE_CHOFFER */ +struct vmbus_chanmsg_choffer { + struct vmbus_chanmsg_hdr hdr; + rte_uuid_t chtype; + rte_uuid_t chinst; + uint64_t chlat; /* unit: 100ns */ + uint32_t chrev; + uint32_t svrctx_sz; + uint16_t chflags; + uint16_t mmio_sz; /* unit: MB */ + uint8_t udata[120]; + uint16_t subidx; + uint16_t rsvd; + uint32_t chanid; + uint8_t montrig; + uint8_t flags1; /* VMBUS_CHOFFER_FLAG1_ */ + uint16_t flags2; + uint32_t connid; +} __rte_packed; + +#define VMBUS_CHOFFER_FLAG1_HASMNF 0x01 + +#endif /* !_VMBUS_REG_H_ */ diff --git a/src/spdk/dpdk/drivers/bus/vmbus/vmbus_bufring.c b/src/spdk/dpdk/drivers/bus/vmbus/vmbus_bufring.c new file mode 100644 index 000000000..c4aa07b30 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/vmbus_bufring.c @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + */ + +#include <unistd.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> +#include <sys/uio.h> + +#include <rte_eal.h> +#include <rte_tailq.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_bus.h> +#include <rte_atomic.h> +#include <rte_memory.h> +#include <rte_pause.h> +#include <rte_bus_vmbus.h> + +#include "private.h" + +/* Increase bufring index by inc with wraparound */ +static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz) +{ + idx += inc; + if (idx >= sz) + idx -= sz; + + return idx; +} + +void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen) +{ + br->vbr = buf; + br->windex = br->vbr->windex; + br->dsize = blen - sizeof(struct vmbus_bufring); +} + +/* + * When we write to the ring buffer, check if the host needs to be + * signaled. + * + * The contract: + * - The host guarantees that while it is draining the TX bufring, + * it will set the br_imask to indicate it does not need to be + * interrupted when new data are added. + * - The host guarantees that it will completely drain the TX bufring + * before exiting the read loop. Further, once the TX bufring is + * empty, it will clear the br_imask and re-check to see if new + * data have arrived. + */ +static inline bool +vmbus_txbr_need_signal(const struct vmbus_bufring *vbr, uint32_t old_windex) +{ + rte_smp_mb(); + if (vbr->imask) + return false; + + rte_smp_rmb(); + + /* + * This is the only case we need to signal when the + * ring transitions from being empty to non-empty. + */ + return old_windex == vbr->rindex; +} + +static inline uint32_t +vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex, + const void *src0, uint32_t cplen) +{ + uint8_t *br_data = tbr->vbr->data; + uint32_t br_dsize = tbr->dsize; + const uint8_t *src = src0; + + /* XXX use double mapping like Linux kernel? */ + if (cplen > br_dsize - windex) { + uint32_t fraglen = br_dsize - windex; + + /* Wrap-around detected */ + memcpy(br_data + windex, src, fraglen); + memcpy(br_data, src + fraglen, cplen - fraglen); + } else { + memcpy(br_data + windex, src, cplen); + } + + return vmbus_br_idxinc(windex, cplen, br_dsize); +} + +/* + * Write scattered channel packet to TX bufring. + * + * The offset of this channel packet is written as a 64bits value + * immediately after this channel packet. + * + * The write goes through three stages: + * 1. Reserve space in ring buffer for the new data. + * Writer atomically moves priv_write_index. + * 2. Copy the new data into the ring. + * 3. Update the tail of the ring (visible to host) that indicates + * next read location. Writer updates write_index + */ +int +vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen, + bool *need_sig) +{ + struct vmbus_bufring *vbr = tbr->vbr; + uint32_t ring_size = tbr->dsize; + uint32_t old_windex, next_windex, windex, total; + uint64_t save_windex; + int i; + + total = 0; + for (i = 0; i < iovlen; i++) + total += iov[i].iov_len; + total += sizeof(save_windex); + + /* Reserve space in ring */ + do { + uint32_t avail; + + /* Get current free location */ + old_windex = tbr->windex; + + /* Prevent compiler reordering this with calculation */ + rte_compiler_barrier(); + + avail = vmbus_br_availwrite(tbr, old_windex); + + /* If not enough space in ring, then tell caller. */ + if (avail <= total) + return -EAGAIN; + + next_windex = vmbus_br_idxinc(old_windex, total, ring_size); + + /* Atomic update of next write_index for other threads */ + } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex)); + + /* Space from old..new is now reserved */ + windex = old_windex; + for (i = 0; i < iovlen; i++) { + windex = vmbus_txbr_copyto(tbr, windex, + iov[i].iov_base, iov[i].iov_len); + } + + /* Set the offset of the current channel packet. */ + save_windex = ((uint64_t)old_windex) << 32; + windex = vmbus_txbr_copyto(tbr, windex, &save_windex, + sizeof(save_windex)); + + /* The region reserved should match region used */ + RTE_ASSERT(windex == next_windex); + + /* Ensure that data is available before updating host index */ + rte_smp_wmb(); + + /* Checkin for our reservation. wait for our turn to update host */ + while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex)) + rte_pause(); + + /* If host had read all data before this, then need to signal */ + *need_sig |= vmbus_txbr_need_signal(vbr, old_windex); + return 0; +} + +static inline uint32_t +vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex, + void *dst0, size_t cplen) +{ + const uint8_t *br_data = rbr->vbr->data; + uint32_t br_dsize = rbr->dsize; + uint8_t *dst = dst0; + + if (cplen > br_dsize - rindex) { + uint32_t fraglen = br_dsize - rindex; + + /* Wrap-around detected. */ + memcpy(dst, br_data + rindex, fraglen); + memcpy(dst + fraglen, br_data, cplen - fraglen); + } else { + memcpy(dst, br_data + rindex, cplen); + } + + return vmbus_br_idxinc(rindex, cplen, br_dsize); +} + +/* Copy data from receive ring but don't change index */ +int +vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen) +{ + uint32_t avail; + + /* + * The requested data and the 64bits channel packet + * offset should be there at least. + */ + avail = vmbus_br_availread(rbr); + if (avail < dlen + sizeof(uint64_t)) + return -EAGAIN; + + vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen); + return 0; +} + +/* + * Copy data from receive ring and change index + * NOTE: + * We assume (dlen + skip) == sizeof(channel packet). + */ +int +vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip) +{ + struct vmbus_bufring *vbr = rbr->vbr; + uint32_t br_dsize = rbr->dsize; + uint32_t rindex; + + if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t)) + return -EAGAIN; + + /* Record where host was when we started read (for debug) */ + rbr->windex = rbr->vbr->windex; + + /* + * Copy channel packet from RX bufring. + */ + rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize); + rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen); + + /* + * Discard this channel packet's 64bits offset, which is useless to us. + */ + rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize); + + /* Update the read index _after_ the channel packet is fetched. */ + rte_compiler_barrier(); + + vbr->rindex = rindex; + + return 0; +} diff --git a/src/spdk/dpdk/drivers/bus/vmbus/vmbus_channel.c b/src/spdk/dpdk/drivers/bus/vmbus/vmbus_channel.c new file mode 100644 index 000000000..ff2985c25 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/vmbus_channel.c @@ -0,0 +1,446 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#include <unistd.h> +#include <stdint.h> +#include <string.h> +#include <sys/uio.h> + +#include <rte_eal.h> +#include <rte_tailq.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_bus.h> +#include <rte_atomic.h> +#include <rte_memory.h> +#include <rte_bus_vmbus.h> + +#include "private.h" + +static inline void +vmbus_sync_set_bit(volatile uint32_t *addr, uint32_t mask) +{ + /* Use GCC builtin which atomic does atomic OR operation */ + __sync_or_and_fetch(addr, mask); +} + +static inline void +vmbus_send_interrupt(const struct rte_vmbus_device *dev, uint32_t relid) +{ + uint32_t *int_addr; + uint32_t int_mask; + + int_addr = dev->int_page + relid / 32; + int_mask = 1u << (relid % 32); + + vmbus_sync_set_bit(int_addr, int_mask); +} + +static inline void +vmbus_set_monitor(const struct rte_vmbus_device *dev, uint32_t monitor_id) +{ + uint32_t *monitor_addr, monitor_mask; + unsigned int trigger_index; + + trigger_index = monitor_id / HV_MON_TRIG_LEN; + monitor_mask = 1u << (monitor_id % HV_MON_TRIG_LEN); + + monitor_addr = &dev->monitor_page->trigs[trigger_index].pending; + vmbus_sync_set_bit(monitor_addr, monitor_mask); +} + +static void +vmbus_set_event(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan) +{ + vmbus_send_interrupt(dev, chan->relid); + vmbus_set_monitor(dev, chan->monitor_id); +} + +/* + * Set the wait between when hypervisor examines the trigger. + */ +void +rte_vmbus_set_latency(const struct rte_vmbus_device *dev, + const struct vmbus_channel *chan, + uint32_t latency) +{ + uint32_t trig_idx = chan->monitor_id / VMBUS_MONTRIG_LEN; + uint32_t trig_offs = chan->monitor_id % VMBUS_MONTRIG_LEN; + + if (latency >= UINT16_MAX * 100) { + VMBUS_LOG(ERR, "invalid latency value %u", latency); + return; + } + + if (trig_idx >= VMBUS_MONTRIGS_MAX) { + VMBUS_LOG(ERR, "invalid monitor trigger %u", + trig_idx); + return; + } + + /* Host value is expressed in 100 nanosecond units */ + dev->monitor_page->lat[trig_idx][trig_offs] = latency / 100; +} + +/* + * Notify host that there are data pending on our TX bufring. + * + * Since this in userspace, rely on the monitor page. + * Can't do a hypercall from userspace. + */ +void +rte_vmbus_chan_signal_tx(const struct vmbus_channel *chan) +{ + const struct rte_vmbus_device *dev = chan->device; + const struct vmbus_br *tbr = &chan->txbr; + + /* Make sure all updates are done before signaling host */ + rte_smp_wmb(); + + /* If host is ignoring interrupts? */ + if (tbr->vbr->imask) + return; + + vmbus_set_event(dev, chan); +} + + +/* Do a simple send directly using transmit ring. */ +int rte_vmbus_chan_send(struct vmbus_channel *chan, uint16_t type, + void *data, uint32_t dlen, + uint64_t xactid, uint32_t flags, bool *need_sig) +{ + struct vmbus_chanpkt pkt; + unsigned int pktlen, pad_pktlen; + const uint32_t hlen = sizeof(pkt); + bool send_evt = false; + uint64_t pad = 0; + struct iovec iov[3]; + int error; + + pktlen = hlen + dlen; + pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t)); + + pkt.hdr.type = type; + pkt.hdr.flags = flags; + pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT; + pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT; + pkt.hdr.xactid = xactid; + + iov[0].iov_base = &pkt; + iov[0].iov_len = hlen; + iov[1].iov_base = data; + iov[1].iov_len = dlen; + iov[2].iov_base = &pad; + iov[2].iov_len = pad_pktlen - pktlen; + + error = vmbus_txbr_write(&chan->txbr, iov, 3, &send_evt); + + /* + * caller sets need_sig to non-NULL if it will handle + * signaling if required later. + * if need_sig is NULL, signal now if needed. + */ + if (need_sig) + *need_sig |= send_evt; + else if (error == 0 && send_evt) + rte_vmbus_chan_signal_tx(chan); + return error; +} + +/* Do a scatter/gather send where the descriptor points to data. */ +int rte_vmbus_chan_send_sglist(struct vmbus_channel *chan, + struct vmbus_gpa sg[], uint32_t sglen, + void *data, uint32_t dlen, + uint64_t xactid, bool *need_sig) +{ + struct vmbus_chanpkt_sglist pkt; + unsigned int pktlen, pad_pktlen, hlen; + bool send_evt = false; + struct iovec iov[4]; + uint64_t pad = 0; + int error; + + hlen = offsetof(struct vmbus_chanpkt_sglist, gpa[sglen]); + pktlen = hlen + dlen; + pad_pktlen = RTE_ALIGN(pktlen, sizeof(uint64_t)); + + pkt.hdr.type = VMBUS_CHANPKT_TYPE_GPA; + pkt.hdr.flags = VMBUS_CHANPKT_FLAG_RC; + pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT; + pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT; + pkt.hdr.xactid = xactid; + pkt.rsvd = 0; + pkt.gpa_cnt = sglen; + + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt); + iov[1].iov_base = sg; + iov[1].iov_len = sizeof(struct vmbus_gpa) * sglen; + iov[2].iov_base = data; + iov[2].iov_len = dlen; + iov[3].iov_base = &pad; + iov[3].iov_len = pad_pktlen - pktlen; + + error = vmbus_txbr_write(&chan->txbr, iov, 4, &send_evt); + + /* if caller is batching, just propagate the status */ + if (need_sig) + *need_sig |= send_evt; + else if (error == 0 && send_evt) + rte_vmbus_chan_signal_tx(chan); + return error; +} + +bool rte_vmbus_chan_rx_empty(const struct vmbus_channel *channel) +{ + const struct vmbus_br *br = &channel->rxbr; + + rte_smp_rmb(); + return br->vbr->rindex == br->vbr->windex; +} + +/* Signal host after reading N bytes */ +void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read) +{ + struct vmbus_br *rbr = &chan->rxbr; + uint32_t write_sz, pending_sz; + + /* No need for signaling on older versions */ + if (!rbr->vbr->feature_bits.feat_pending_send_sz) + return; + + /* Make sure reading of pending happens after new read index */ + rte_mb(); + + pending_sz = rbr->vbr->pending_send; + if (!pending_sz) + return; + + rte_smp_rmb(); + write_sz = vmbus_br_availwrite(rbr, rbr->vbr->windex); + + /* If there was space before then host was not blocked */ + if (write_sz - bytes_read > pending_sz) + return; + + /* If pending write will not fit */ + if (write_sz <= pending_sz) + return; + + vmbus_set_event(chan->device, chan); +} + +int rte_vmbus_chan_recv(struct vmbus_channel *chan, void *data, uint32_t *len, + uint64_t *request_id) +{ + struct vmbus_chanpkt_hdr pkt; + uint32_t dlen, hlen, bufferlen = *len; + int error; + + *len = 0; + + error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt)); + if (error) + return error; + + if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) { + VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen); + /* XXX this channel is dead actually. */ + return -EIO; + } + + if (unlikely(pkt.hlen > pkt.tlen)) { + VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u", + pkt.hlen, pkt.tlen); + return -EIO; + } + + /* Length are in quad words */ + hlen = pkt.hlen << VMBUS_CHANPKT_SIZE_SHIFT; + dlen = (pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT) - hlen; + *len = dlen; + + /* If caller buffer is not large enough */ + if (unlikely(dlen > bufferlen)) + return -ENOBUFS; + + if (request_id) + *request_id = pkt.xactid; + + /* Read data and skip packet header */ + error = vmbus_rxbr_read(&chan->rxbr, data, dlen, hlen); + if (error) + return error; + + rte_vmbus_chan_signal_read(chan, dlen + hlen + sizeof(uint64_t)); + return 0; +} + +/* TODO: replace this with inplace ring buffer (no copy) */ +int rte_vmbus_chan_recv_raw(struct vmbus_channel *chan, + void *data, uint32_t *len) +{ + struct vmbus_chanpkt_hdr pkt; + uint32_t dlen, bufferlen = *len; + int error; + + error = vmbus_rxbr_peek(&chan->rxbr, &pkt, sizeof(pkt)); + if (error) + return error; + + if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) { + VMBUS_LOG(ERR, "VMBUS recv, invalid hlen %u", pkt.hlen); + /* XXX this channel is dead actually. */ + return -EIO; + } + + if (unlikely(pkt.hlen > pkt.tlen)) { + VMBUS_LOG(ERR, "VMBUS recv,invalid hlen %u and tlen %u", + pkt.hlen, pkt.tlen); + return -EIO; + } + + /* Length are in quad words */ + dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT; + *len = dlen; + + /* If caller buffer is not large enough */ + if (unlikely(dlen > bufferlen)) + return -ENOBUFS; + + /* Read data and skip packet header */ + error = vmbus_rxbr_read(&chan->rxbr, data, dlen, 0); + if (error) + return error; + + /* Return the number of bytes read */ + return dlen + sizeof(uint64_t); +} + +int vmbus_chan_create(const struct rte_vmbus_device *device, + uint16_t relid, uint16_t subid, uint8_t monitor_id, + struct vmbus_channel **new_chan) +{ + struct vmbus_channel *chan; + int err; + + chan = rte_zmalloc_socket("VMBUS", sizeof(*chan), RTE_CACHE_LINE_SIZE, + device->device.numa_node); + if (!chan) + return -ENOMEM; + + STAILQ_INIT(&chan->subchannel_list); + chan->device = device; + chan->subchannel_id = subid; + chan->relid = relid; + chan->monitor_id = monitor_id; + *new_chan = chan; + + err = vmbus_uio_map_rings(chan); + if (err) { + rte_free(chan); + return err; + } + + return 0; +} + +/* Setup the primary channel */ +int rte_vmbus_chan_open(struct rte_vmbus_device *device, + struct vmbus_channel **new_chan) +{ + struct mapped_vmbus_resource *uio_res; + int err; + + uio_res = vmbus_uio_find_resource(device); + if (!uio_res) { + VMBUS_LOG(ERR, "can't find uio resource"); + return -EINVAL; + } + + err = vmbus_chan_create(device, device->relid, 0, + device->monitor_id, new_chan); + if (!err) { + device->primary = *new_chan; + uio_res->primary = *new_chan; + } + + return err; +} + +int rte_vmbus_max_channels(const struct rte_vmbus_device *device) +{ + if (vmbus_uio_subchannels_supported(device, device->primary)) + return VMBUS_MAX_CHANNELS; + else + return 1; +} + +/* Setup secondary channel */ +int rte_vmbus_subchan_open(struct vmbus_channel *primary, + struct vmbus_channel **new_chan) +{ + struct vmbus_channel *chan; + int err; + + err = vmbus_uio_get_subchan(primary, &chan); + if (err) + return err; + + STAILQ_INSERT_TAIL(&primary->subchannel_list, chan, next); + *new_chan = chan; + return 0; +} + +uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan) +{ + return chan->subchannel_id; +} + +void rte_vmbus_chan_close(struct vmbus_channel *chan) +{ + const struct rte_vmbus_device *device = chan->device; + struct vmbus_channel *primary = device->primary; + + /* + * intentionally leak primary channel because + * secondary may still reference it + */ + if (chan != primary) { + STAILQ_REMOVE(&primary->subchannel_list, chan, + vmbus_channel, next); + rte_free(chan); + } + +} + +static void vmbus_dump_ring(FILE *f, const char *id, const struct vmbus_br *br) +{ + const struct vmbus_bufring *vbr = br->vbr; + struct vmbus_chanpkt_hdr pkt; + + fprintf(f, "%s windex=%u rindex=%u mask=%u pending=%u feature=%#x\n", + id, vbr->windex, vbr->rindex, vbr->imask, + vbr->pending_send, vbr->feature_bits.value); + fprintf(f, " size=%u avail write=%u read=%u\n", + br->dsize, vmbus_br_availwrite(br, vbr->windex), + vmbus_br_availread(br)); + + if (vmbus_rxbr_peek(br, &pkt, sizeof(pkt)) == 0) + fprintf(f, " pkt type %#x len %u flags %#x xactid %#"PRIx64"\n", + pkt.type, + pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT, + pkt.flags, pkt.xactid); +} + +void rte_vmbus_chan_dump(FILE *f, const struct vmbus_channel *chan) +{ + fprintf(f, "channel[%u] relid=%u monitor=%u\n", + chan->subchannel_id, chan->relid, chan->monitor_id); + vmbus_dump_ring(f, "rxbr", &chan->rxbr); + vmbus_dump_ring(f, "txbr", &chan->txbr); +} diff --git a/src/spdk/dpdk/drivers/bus/vmbus/vmbus_common.c b/src/spdk/dpdk/drivers/bus/vmbus/vmbus_common.c new file mode 100644 index 000000000..3adef01c9 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/vmbus_common.c @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#include <string.h> +#include <unistd.h> +#include <dirent.h> +#include <fcntl.h> +#include <sys/queue.h> +#include <sys/mman.h> + +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_eal.h> +#include <rte_tailq.h> +#include <rte_devargs.h> +#include <rte_malloc.h> +#include <rte_errno.h> +#include <rte_memory.h> +#include <rte_bus_vmbus.h> + +#include "private.h" + +int vmbus_logtype_bus; +extern struct rte_vmbus_bus rte_vmbus_bus; + +/* map a particular resource from a file */ +void * +vmbus_map_resource(void *requested_addr, int fd, off_t offset, size_t size, + int flags) +{ + void *mapaddr; + + /* Map the memory resource of device */ + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, + MAP_SHARED | flags, fd, offset); + if (mapaddr == MAP_FAILED) { + VMBUS_LOG(ERR, + "mmap(%d, %p, %zu, %ld) failed: %s", + fd, requested_addr, size, (long)offset, + strerror(errno)); + } + return mapaddr; +} + +/* unmap a particular resource */ +void +vmbus_unmap_resource(void *requested_addr, size_t size) +{ + if (requested_addr == NULL) + return; + + /* Unmap the VMBUS memory resource of device */ + if (munmap(requested_addr, size)) { + VMBUS_LOG(ERR, "munmap(%p, 0x%lx) failed: %s", + requested_addr, (unsigned long)size, + strerror(errno)); + } else + VMBUS_LOG(DEBUG, " VMBUS memory unmapped at %p", + requested_addr); +} + +/** + * Match the VMBUS driver and device using UUID table + * + * @param drv + * VMBUS driver from which ID table would be extracted + * @param pci_dev + * VMBUS device to match against the driver + * @return + * true for successful match + * false for unsuccessful match + */ +static bool +vmbus_match(const struct rte_vmbus_driver *dr, + const struct rte_vmbus_device *dev) +{ + const rte_uuid_t *id_table; + + for (id_table = dr->id_table; !rte_uuid_is_null(*id_table); ++id_table) { + if (rte_uuid_compare(*id_table, dev->class_id) == 0) + return true; + } + + return false; +} +/* + * If device ID match, call the devinit() function of the driver. + */ +static int +vmbus_probe_one_driver(struct rte_vmbus_driver *dr, + struct rte_vmbus_device *dev) +{ + char guid[RTE_UUID_STRLEN]; + int ret; + + if (!vmbus_match(dr, dev)) + return 1; /* not supported */ + + rte_uuid_unparse(dev->device_id, guid, sizeof(guid)); + VMBUS_LOG(INFO, "VMBUS device %s on NUMA socket %i", + guid, dev->device.numa_node); + + /* TODO add blacklisted */ + + /* map resources for device */ + ret = rte_vmbus_map_device(dev); + if (ret != 0) + return ret; + + /* reference driver structure */ + dev->driver = dr; + + if (dev->device.numa_node < 0) { + VMBUS_LOG(WARNING, " Invalid NUMA socket, default to 0"); + dev->device.numa_node = 0; + } + + /* call the driver probe() function */ + VMBUS_LOG(INFO, " probe driver: %s", dr->driver.name); + ret = dr->probe(dr, dev); + if (ret) { + dev->driver = NULL; + rte_vmbus_unmap_device(dev); + } else { + dev->device.driver = &dr->driver; + } + + return ret; +} + +/* + * If device class GUID matches, call the probe function of + * registere drivers for the vmbus device. + * Return -1 if initialization failed, + * and 1 if no driver found for this device. + */ +static int +vmbus_probe_all_drivers(struct rte_vmbus_device *dev) +{ + struct rte_vmbus_driver *dr; + int rc; + + /* Check if a driver is already loaded */ + if (rte_dev_is_probed(&dev->device)) { + VMBUS_LOG(DEBUG, "VMBUS driver already loaded"); + return 0; + } + + FOREACH_DRIVER_ON_VMBUS(dr) { + rc = vmbus_probe_one_driver(dr, dev); + if (rc < 0) /* negative is an error */ + return -1; + + if (rc > 0) /* positive driver doesn't support it */ + continue; + + return 0; + } + return 1; +} + +/* + * Scan the vmbus, and call the devinit() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +int +rte_vmbus_probe(void) +{ + struct rte_vmbus_device *dev; + size_t probed = 0, failed = 0; + char ubuf[RTE_UUID_STRLEN]; + + FOREACH_DEVICE_ON_VMBUS(dev) { + probed++; + + rte_uuid_unparse(dev->device_id, ubuf, sizeof(ubuf)); + + /* TODO: add whitelist/blacklist */ + + if (vmbus_probe_all_drivers(dev) < 0) { + VMBUS_LOG(NOTICE, + "Requested device %s cannot be used", ubuf); + rte_errno = errno; + failed++; + } + } + + return (probed && probed == failed) ? -1 : 0; +} + +static int +vmbus_parse(const char *name, void *addr) +{ + rte_uuid_t guid; + int ret; + + ret = rte_uuid_parse(name, guid); + if (ret == 0 && addr) + memcpy(addr, &guid, sizeof(guid)); + + return ret; +} + +/* + * scan for matching device args on command line + * example: + * -w 'vmbus:635a7ae3-091e-4410-ad59-667c4f8c04c3,latency=20' + */ +struct rte_devargs * +vmbus_devargs_lookup(struct rte_vmbus_device *dev) +{ + struct rte_devargs *devargs; + rte_uuid_t addr; + + RTE_EAL_DEVARGS_FOREACH("vmbus", devargs) { + vmbus_parse(devargs->name, &addr); + + if (rte_uuid_compare(dev->device_id, addr) == 0) + return devargs; + } + return NULL; + +} + +/* register vmbus driver */ +void +rte_vmbus_register(struct rte_vmbus_driver *driver) +{ + VMBUS_LOG(DEBUG, + "Registered driver %s", driver->driver.name); + + TAILQ_INSERT_TAIL(&rte_vmbus_bus.driver_list, driver, next); + driver->bus = &rte_vmbus_bus; +} + +/* unregister vmbus driver */ +void +rte_vmbus_unregister(struct rte_vmbus_driver *driver) +{ + TAILQ_REMOVE(&rte_vmbus_bus.driver_list, driver, next); + driver->bus = NULL; +} + +/* Add a device to VMBUS bus */ +void +vmbus_add_device(struct rte_vmbus_device *vmbus_dev) +{ + TAILQ_INSERT_TAIL(&rte_vmbus_bus.device_list, vmbus_dev, next); +} + +/* Insert a device into a predefined position in VMBUS bus */ +void +vmbus_insert_device(struct rte_vmbus_device *exist_vmbus_dev, + struct rte_vmbus_device *new_vmbus_dev) +{ + TAILQ_INSERT_BEFORE(exist_vmbus_dev, new_vmbus_dev, next); +} + +/* Remove a device from VMBUS bus */ +void +vmbus_remove_device(struct rte_vmbus_device *vmbus_dev) +{ + TAILQ_REMOVE(&rte_vmbus_bus.device_list, vmbus_dev, next); +} + +/* VMBUS doesn't support hotplug */ +static struct rte_device * +vmbus_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, + const void *data) +{ + struct rte_vmbus_device *dev; + + FOREACH_DEVICE_ON_VMBUS(dev) { + if (start && &dev->device == start) { + start = NULL; + continue; + } + if (cmp(&dev->device, data) == 0) + return &dev->device; + } + + return NULL; +} + + +struct rte_vmbus_bus rte_vmbus_bus = { + .bus = { + .scan = rte_vmbus_scan, + .probe = rte_vmbus_probe, + .find_device = vmbus_find_device, + .parse = vmbus_parse, + }, + .device_list = TAILQ_HEAD_INITIALIZER(rte_vmbus_bus.device_list), + .driver_list = TAILQ_HEAD_INITIALIZER(rte_vmbus_bus.driver_list), +}; + +RTE_REGISTER_BUS(vmbus, rte_vmbus_bus.bus); + +RTE_INIT(vmbus_init_log) +{ + vmbus_logtype_bus = rte_log_register("bus.vmbus"); + if (vmbus_logtype_bus >= 0) + rte_log_set_level(vmbus_logtype_bus, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/bus/vmbus/vmbus_common_uio.c b/src/spdk/dpdk/drivers/bus/vmbus/vmbus_common_uio.c new file mode 100644 index 000000000..8e476f2ea --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/vmbus/vmbus_common_uio.c @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018, Microsoft Corporation. + * All Rights Reserved. + */ + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/mman.h> + +#include <rte_eal.h> +#include <rte_tailq.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_bus.h> +#include <rte_bus_vmbus.h> + +#include "private.h" + +static struct rte_tailq_elem vmbus_tailq = { + .name = "VMBUS_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(vmbus_tailq) + +struct mapped_vmbus_resource * +vmbus_uio_find_resource(const struct rte_vmbus_device *dev) +{ + struct mapped_vmbus_resource *uio_res; + struct mapped_vmbus_res_list *uio_res_list = + RTE_TAILQ_CAST(vmbus_tailq.head, mapped_vmbus_res_list); + + if (dev == NULL) + return NULL; + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + if (rte_uuid_compare(uio_res->id, dev->device_id) == 0) + return uio_res; + } + return NULL; +} + +static int +vmbus_uio_map_secondary(struct rte_vmbus_device *dev) +{ + struct mapped_vmbus_resource *uio_res; + struct vmbus_channel *chan; + int fd, i; + + uio_res = vmbus_uio_find_resource(dev); + if (!uio_res) { + VMBUS_LOG(ERR, "Cannot find resource for device"); + return -1; + } + + /* open /dev/uioX */ + fd = open(uio_res->path, O_RDWR); + if (fd < 0) { + VMBUS_LOG(ERR, "Cannot open %s: %s", + uio_res->path, strerror(errno)); + return -1; + } + + for (i = 0; i != uio_res->nb_maps; i++) { + void *mapaddr; + off_t offset = i * PAGE_SIZE; + + mapaddr = vmbus_map_resource(uio_res->maps[i].addr, + fd, offset, + uio_res->maps[i].size, 0); + + if (mapaddr == uio_res->maps[i].addr) + continue; /* successful map */ + + if (mapaddr == MAP_FAILED) + VMBUS_LOG(ERR, + "mmap resource %d in secondary failed", i); + else { + VMBUS_LOG(ERR, + "mmap resource %d address mismatch", i); + vmbus_unmap_resource(mapaddr, uio_res->maps[i].size); + } + + close(fd); + return -1; + } + + /* fd is not needed in slave process, close it */ + close(fd); + + dev->primary = uio_res->primary; + if (!dev->primary) { + VMBUS_LOG(ERR, "missing primary channel"); + return -1; + } + + STAILQ_FOREACH(chan, &dev->primary->subchannel_list, next) { + if (vmbus_uio_map_secondary_subchan(dev, chan) != 0) { + VMBUS_LOG(ERR, "cannot map secondary subchan"); + return -1; + } + } + return 0; +} + +static int +vmbus_uio_map_primary(struct rte_vmbus_device *dev) +{ + int i, ret; + struct mapped_vmbus_resource *uio_res = NULL; + struct mapped_vmbus_res_list *uio_res_list = + RTE_TAILQ_CAST(vmbus_tailq.head, mapped_vmbus_res_list); + + /* allocate uio resource */ + ret = vmbus_uio_alloc_resource(dev, &uio_res); + if (ret) + return ret; + + /* Map the resources */ + for (i = 0; i < VMBUS_MAX_RESOURCE; i++) { + /* stop at empty BAR */ + if (dev->resource[i].len == 0) + break; + + ret = vmbus_uio_map_resource_by_index(dev, i, uio_res, 0); + if (ret) + goto error; + } + + uio_res->nb_maps = i; + + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); + + return 0; +error: + while (--i >= 0) { + vmbus_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + } + vmbus_uio_free_resource(dev, uio_res); + return -1; +} + +/* map the VMBUS resource of a VMBUS device in virtual memory */ +int +vmbus_uio_map_resource(struct rte_vmbus_device *dev) +{ + struct mapped_vmbus_resource *uio_res; + int ret; + + /* TODO: handle rescind */ + dev->intr_handle.fd = -1; + dev->intr_handle.uio_cfg_fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* secondary processes - use already recorded details */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + ret = vmbus_uio_map_secondary(dev); + else + ret = vmbus_uio_map_primary(dev); + + if (ret != 0) + return ret; + + uio_res = vmbus_uio_find_resource(dev); + if (!uio_res) { + VMBUS_LOG(ERR, "can not find resources!"); + return -EIO; + } + + if (uio_res->nb_maps <= HV_MON_PAGE_MAP) { + VMBUS_LOG(ERR, "VMBUS: only %u resources found!", + uio_res->nb_maps); + return -EINVAL; + } + + dev->int_page = (uint32_t *)((char *)uio_res->maps[HV_INT_PAGE_MAP].addr + + (PAGE_SIZE >> 1)); + dev->monitor_page = uio_res->maps[HV_MON_PAGE_MAP].addr; + return 0; +} + +static void +vmbus_uio_unmap(struct mapped_vmbus_resource *uio_res) +{ + int i; + + if (uio_res == NULL) + return; + + for (i = 0; i != uio_res->nb_maps; i++) { + vmbus_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + } +} + +/* unmap the VMBUS resource of a VMBUS device in virtual memory */ +void +vmbus_uio_unmap_resource(struct rte_vmbus_device *dev) +{ + struct mapped_vmbus_resource *uio_res; + struct mapped_vmbus_res_list *uio_res_list = + RTE_TAILQ_CAST(vmbus_tailq.head, mapped_vmbus_res_list); + + if (dev == NULL) + return; + + /* find an entry for the device */ + uio_res = vmbus_uio_find_resource(dev); + if (uio_res == NULL) + return; + + /* secondary processes - just free maps */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return vmbus_uio_unmap(uio_res); + + TAILQ_REMOVE(uio_res_list, uio_res, next); + + /* unmap all resources */ + vmbus_uio_unmap(uio_res); + + /* free uio resource */ + rte_free(uio_res); + + /* close fd if in primary process */ + close(dev->intr_handle.fd); + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; +} |