diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/dpdk/drivers/bus/pci | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/dpdk/drivers/bus/pci')
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/Makefile | 33 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/bsd/Makefile | 4 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/bsd/pci.c | 676 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/linux/Makefile | 6 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/linux/pci.c | 878 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/linux/pci_init.h | 88 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/linux/pci_uio.c | 569 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/linux/pci_vfio.c | 1070 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/meson.build | 19 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/pci_common.c | 687 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/pci_common_uio.c | 239 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/pci_params.c | 78 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/private.h | 220 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/rte_bus_pci.h | 361 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/bus/pci/rte_bus_pci_version.map | 18 |
15 files changed, 4946 insertions, 0 deletions
diff --git a/src/spdk/dpdk/drivers/bus/pci/Makefile b/src/spdk/dpdk/drivers/bus/pci/Makefile new file mode 100644 index 000000000..f4102d0a7 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 6WIND S.A. + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_bus_pci.a +EXPORT_MAP := rte_bus_pci_version.map + +CFLAGS := -I$(SRCDIR) $(CFLAGS) +CFLAGS += -O3 $(WERROR_FLAGS) + +ifneq ($(CONFIG_RTE_EXEC_ENV_LINUX),) +SYSTEM := linux +endif +ifneq ($(CONFIG_RTE_EXEC_ENV_FREEBSD),) +SYSTEM := bsd +endif + +CFLAGS += -I$(RTE_SDK)/drivers/bus/pci/$(SYSTEM) +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common + +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_pci -lrte_kvargs + +include $(RTE_SDK)/drivers/bus/pci/$(SYSTEM)/Makefile +SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) := $(addprefix $(SYSTEM)/,$(SRCS)) +SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_params.c +SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common.c +SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common_uio.c + +SYMLINK-$(CONFIG_RTE_LIBRTE_PCI_BUS)-include += rte_bus_pci.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/bus/pci/bsd/Makefile b/src/spdk/dpdk/drivers/bus/pci/bsd/Makefile new file mode 100644 index 000000000..c1b54c05e --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/bsd/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 6WIND S.A. + +SRCS += pci.c diff --git a/src/spdk/dpdk/drivers/bus/pci/bsd/pci.c b/src/spdk/dpdk/drivers/bus/pci/bsd/pci.c new file mode 100644 index 000000000..6ec27b4b5 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/bsd/pci.c @@ -0,0 +1,676 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <unistd.h> +#include <inttypes.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <dirent.h> +#include <limits.h> +#include <sys/queue.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/pciio.h> +#include <dev/pci/pcireg.h> + +#if defined(RTE_ARCH_X86) +#include <machine/cpufunc.h> +#endif + +#include <rte_interrupts.h> +#include <rte_log.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_common.h> +#include <rte_launch.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_malloc.h> +#include <rte_string_fns.h> +#include <rte_debug.h> +#include <rte_devargs.h> + +#include "eal_filesystem.h" +#include "private.h" + +/** + * @file + * PCI probing under BSD + * + * This code is used to simulate a PCI probe by parsing information in + * sysfs. Moreover, when a registered driver matches a device, the + * kernel driver currently using it is unloaded and replaced by + * igb_uio module, which is a very minimal userland driver for Intel + * network card, only providing access to PCI BAR to applications, and + * enabling bus master. + */ + +extern struct rte_pci_bus rte_pci_bus; + +/* Map pci device */ +int +rte_pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources */ + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +rte_pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources */ + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.fd) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char devname[PATH_MAX]; /* contains the /dev/uioX */ + struct rte_pci_addr *loc; + + loc = &dev->addr; + + snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u", + dev->addr.bus, dev->addr.devid, dev->addr.function); + + if (access(devname, O_RDWR) < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + strlcpy((*uio_res)->path, devname, sizeof((*uio_res)->path)); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd; + char *devname; + void *mapaddr; + uint64_t offset; + uint64_t pagesz; + struct pci_map *maps; + + maps = uio_res->maps; + devname = uio_res->path; + pagesz = sysconf(_SC_PAGESIZE); + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + /* if matching map is found, then use it */ + offset = res_idx * pagesz; + mapaddr = pci_map_resource(NULL, fd, (off_t)offset, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = offset; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} + +static int +pci_scan_one(int dev_pci_fd, struct pci_conf *conf) +{ + struct rte_pci_device *dev; + struct pci_bar_io bar; + unsigned i, max; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) { + return -1; + } + + memset(dev, 0, sizeof(*dev)); + dev->device.bus = &rte_pci_bus.bus; + + dev->addr.domain = conf->pc_sel.pc_domain; + dev->addr.bus = conf->pc_sel.pc_bus; + dev->addr.devid = conf->pc_sel.pc_dev; + dev->addr.function = conf->pc_sel.pc_func; + + /* get vendor id */ + dev->id.vendor_id = conf->pc_vendor; + + /* get device id */ + dev->id.device_id = conf->pc_device; + + /* get subsystem_vendor id */ + dev->id.subsystem_vendor_id = conf->pc_subvendor; + + /* get subsystem_device id */ + dev->id.subsystem_device_id = conf->pc_subdevice; + + /* get class id */ + dev->id.class_id = (conf->pc_class << 16) | + (conf->pc_subclass << 8) | + (conf->pc_progif); + + /* TODO: get max_vfs */ + dev->max_vfs = 0; + + /* FreeBSD has no NUMA support (yet) */ + dev->device.numa_node = 0; + + pci_name_set(dev); + + /* FreeBSD has only one pass through driver */ + dev->kdrv = RTE_KDRV_NIC_UIO; + + /* parse resources */ + switch (conf->pc_hdr & PCIM_HDRTYPE) { + case PCIM_HDRTYPE_NORMAL: + max = PCIR_MAX_BAR_0; + break; + case PCIM_HDRTYPE_BRIDGE: + max = PCIR_MAX_BAR_1; + break; + case PCIM_HDRTYPE_CARDBUS: + max = PCIR_MAX_BAR_2; + break; + default: + goto skipdev; + } + + for (i = 0; i <= max; i++) { + bar.pbi_sel = conf->pc_sel; + bar.pbi_reg = PCIR_BAR(i); + if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0) + continue; + + dev->mem_resource[i].len = bar.pbi_length; + if (PCI_BAR_IO(bar.pbi_base)) { + dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf)); + continue; + } + dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf); + } + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); + } + else { + struct rte_pci_device *dev2 = NULL; + int ret; + + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { + ret = rte_pci_addr_cmp(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + else if (ret < 0) { + rte_pci_insert_device(dev2, dev); + } else { /* already registered */ + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + pci_name_set(dev2); + memmove(dev2->mem_resource, + dev->mem_resource, + sizeof(dev->mem_resource)); + free(dev); + } + return 0; + } + rte_pci_add_device(dev); + } + + return 0; + +skipdev: + free(dev); + return 0; +} + +/* + * Scan the content of the PCI bus, and add the devices in the devices + * list. Call pci_scan_one() for each pci entry found. + */ +int +rte_pci_scan(void) +{ + int fd; + unsigned dev_count = 0; + struct pci_conf matches[16]; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 0, + .patterns = NULL, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + struct rte_pci_addr pci_addr; + + /* for debug purposes, PCI can be disabled */ + if (!rte_eal_has_pci()) + return 0; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + do { + unsigned i; + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + for (i = 0; i < conf_io.num_matches; i++) { + pci_addr.domain = matches[i].pc_sel.pc_domain; + pci_addr.bus = matches[i].pc_sel.pc_bus; + pci_addr.devid = matches[i].pc_sel.pc_dev; + pci_addr.function = matches[i].pc_sel.pc_func; + + if (rte_pci_ignore_device(&pci_addr)) + continue; + + if (pci_scan_one(fd, &matches[i]) < 0) + goto error; + } + + dev_count += conf_io.num_matches; + } while(conf_io.status == PCI_GETCONF_MORE_DEVS); + + close(fd); + + RTE_LOG(DEBUG, EAL, "PCI scan found %u devices\n", dev_count); + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + +bool +pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) +{ + return false; +} + +enum rte_iova_mode +pci_device_iova_mode(const struct rte_pci_driver *pdrv __rte_unused, + const struct rte_pci_device *pdev) +{ + /* Supports only RTE_KDRV_NIC_UIO */ + if (pdev->kdrv != RTE_KDRV_NIC_UIO) + RTE_LOG(DEBUG, EAL, "Unsupported kernel driver? Defaulting to IOVA as 'PA'\n"); + + return RTE_IOVA_PA; +} + +int +pci_update_device(const struct rte_pci_addr *addr) +{ + int fd; + struct pci_conf matches[2]; + struct pci_match_conf match = { + .pc_sel = { + .pc_domain = addr->domain, + .pc_bus = addr->bus, + .pc_dev = addr->devid, + .pc_func = addr->function, + }, + }; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 1, + .patterns = &match, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + if (conf_io.num_matches != 1) + goto error; + + if (pci_scan_one(fd, &matches[0]) < 0) + goto error; + + close(fd); + + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + +/* Read PCI config space. */ +int rte_pci_read_config(const struct rte_pci_device *dev, + void *buf, size_t len, off_t offset) +{ + int fd = -1; + int size; + /* Copy Linux implementation's behaviour */ + const int return_len = len; + struct pci_io pi = { + .pi_sel = { + .pc_domain = dev->addr.domain, + .pc_bus = dev->addr.bus, + .pc_dev = dev->addr.devid, + .pc_func = dev->addr.function, + }, + .pi_reg = offset, + }; + + fd = open("/dev/pci", O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + while (len > 0) { + size = (len >= 4) ? 4 : ((len >= 2) ? 2 : 1); + pi.pi_width = size; + + if (ioctl(fd, PCIOCREAD, &pi) < 0) + goto error; + memcpy(buf, &pi.pi_data, size); + + buf = (char *)buf + size; + pi.pi_reg += size; + len -= size; + } + close(fd); + + return return_len; + + error: + if (fd >= 0) + close(fd); + return -1; +} + +/* Write PCI config space. */ +int rte_pci_write_config(const struct rte_pci_device *dev, + const void *buf, size_t len, off_t offset) +{ + int fd = -1; + + struct pci_io pi = { + .pi_sel = { + .pc_domain = dev->addr.domain, + .pc_bus = dev->addr.bus, + .pc_dev = dev->addr.devid, + .pc_func = dev->addr.function, + }, + .pi_reg = offset, + .pi_data = *(const uint32_t *)buf, + .pi_width = len, + }; + + if (len == 3 || len > sizeof(pi.pi_data)) { + RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); + goto error; + } + + memcpy(&pi.pi_data, buf, len); + + fd = open("/dev/pci", O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCWRITE, &pi) < 0) + goto error; + + close(fd); + return 0; + + error: + if (fd >= 0) + close(fd); + return -1; +} + +int +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + int ret; + + switch (dev->kdrv) { +#if defined(RTE_ARCH_X86) + case RTE_KDRV_NIC_UIO: + if (rte_eal_iopl_init() != 0) { + RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n", + __func__, dev->name); + return -1; + } + if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) { + p->base = (uintptr_t)dev->mem_resource[bar].addr; + ret = 0; + } else + ret = -1; + break; +#endif + default: + ret = -1; + break; + } + + if (!ret) + p->dev = dev; + + return ret; +} + +static void +pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + uint8_t *d; + int size; + unsigned short reg = p->base + offset; + + for (d = data; len > 0; d += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + *(uint32_t *)d = inl(reg); + } else if (len >= 2) { + size = 2; + *(uint16_t *)d = inw(reg); + } else { + size = 1; + *d = inb(reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { + case RTE_KDRV_NIC_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; + default: + break; + } +} + +static void +pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + const uint8_t *s; + int size; + unsigned short reg = p->base + offset; + + for (s = data; len > 0; s += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + outl(reg, *(const uint32_t *)s); + } else if (len >= 2) { + size = 2; + outw(reg, *(const uint16_t *)s); + } else { + size = 1; + outb(reg, *s); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { + case RTE_KDRV_NIC_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; + default: + break; + } +} + +int +rte_pci_ioport_unmap(struct rte_pci_ioport *p) +{ + int ret; + + switch (p->dev->kdrv) { +#if defined(RTE_ARCH_X86) + case RTE_KDRV_NIC_UIO: + ret = 0; + break; +#endif + default: + ret = -1; + break; + } + + return ret; +} diff --git a/src/spdk/dpdk/drivers/bus/pci/linux/Makefile b/src/spdk/dpdk/drivers/bus/pci/linux/Makefile new file mode 100644 index 000000000..90404468b --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/linux/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 6WIND S.A. + +SRCS += pci.c +SRCS += pci_uio.c +SRCS += pci_vfio.c diff --git a/src/spdk/dpdk/drivers/bus/pci/linux/pci.c b/src/spdk/dpdk/drivers/bus/pci/linux/pci.c new file mode 100644 index 000000000..313e8ffe6 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/linux/pci.c @@ -0,0 +1,878 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <string.h> +#include <dirent.h> + +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_malloc.h> +#include <rte_devargs.h> +#include <rte_memcpy.h> +#include <rte_vfio.h> + +#include "eal_filesystem.h" + +#include "private.h" +#include "pci_init.h" + +/** + * @file + * PCI probing under linux + * + * This code is used to simulate a PCI probe by parsing information in sysfs. + * When a registered device matches a driver, it is then initialized with + * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). + */ + +extern struct rte_pci_bus rte_pci_bus; + +static int +pci_get_kernel_driver_by_path(const char *filename, char *dri_name, + size_t len) +{ + int count; + char path[PATH_MAX]; + char *name; + + if (!filename || !dri_name) + return -1; + + count = readlink(filename, path, PATH_MAX); + if (count >= PATH_MAX) + return -1; + + /* For device does not have a driver */ + if (count < 0) + return 1; + + path[count] = '\0'; + + name = strrchr(path, '/'); + if (name) { + strlcpy(dri_name, name + 1, len); + return 0; + } + + return -1; +} + +/* Map pci device */ +int +rte_pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + ret = pci_vfio_map_resource(dev); +#endif + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + if (rte_eal_using_phys_addrs()) { + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + } + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +rte_pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + pci_vfio_unmap_resource(dev); +#endif + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +static int +find_max_end_va(const struct rte_memseg_list *msl, void *arg) +{ + size_t sz = msl->len; + void *end_va = RTE_PTR_ADD(msl->base_va, sz); + void **max_va = arg; + + if (*max_va < end_va) + *max_va = end_va; + return 0; +} + +void * +pci_find_max_end_va(void) +{ + void *va = NULL; + + rte_memseg_list_walk(find_max_end_va, &va); + return va; +} + + +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int +pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags) +{ + union pci_resource_info { + struct { + char *phys_addr; + char *end_addr; + char *flags; + }; + char *ptrs[PCI_RESOURCE_FMT_NVAL]; + } res_info; + + if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + errno = 0; + *phys_addr = strtoull(res_info.phys_addr, NULL, 16); + *end_addr = strtoull(res_info.end_addr, NULL, 16); + *flags = strtoull(res_info.flags, NULL, 16); + if (errno != 0) { + RTE_LOG(ERR, EAL, + "%s(): bad resource format\n", __func__); + return -1; + } + + return 0; +} + +/* parse the "resource" sysfs file */ +static int +pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +{ + FILE *f; + char buf[BUFSIZ]; + int i; + uint64_t phys_addr, end_addr, flags; + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); + return -1; + } + + for (i = 0; i<PCI_MAX_RESOURCE; i++) { + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot read resource\n", __func__); + goto error; + } + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) + goto error; + + if (flags & IORESOURCE_MEM) { + dev->mem_resource[i].phys_addr = phys_addr; + dev->mem_resource[i].len = end_addr - phys_addr + 1; + /* not mapped for now */ + dev->mem_resource[i].addr = NULL; + } + } + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +/* Scan one pci sysfs entry, and fill the devices list from it. */ +static int +pci_scan_one(const char *dirname, const struct rte_pci_addr *addr) +{ + char filename[PATH_MAX]; + unsigned long tmp; + struct rte_pci_device *dev; + char driver[PATH_MAX]; + int ret; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) + return -1; + + memset(dev, 0, sizeof(*dev)); + dev->device.bus = &rte_pci_bus.bus; + dev->addr = *addr; + + /* get vendor id */ + snprintf(filename, sizeof(filename), "%s/vendor", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.vendor_id = (uint16_t)tmp; + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/device", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.device_id = (uint16_t)tmp; + + /* get subsystem_vendor id */ + snprintf(filename, sizeof(filename), "%s/subsystem_vendor", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_vendor_id = (uint16_t)tmp; + + /* get subsystem_device id */ + snprintf(filename, sizeof(filename), "%s/subsystem_device", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_device_id = (uint16_t)tmp; + + /* get class_id */ + snprintf(filename, sizeof(filename), "%s/class", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + /* the least 24 bits are valid: class, subclass, program interface */ + dev->id.class_id = (uint32_t)tmp & RTE_CLASS_ANY_ID; + + /* get max_vfs */ + dev->max_vfs = 0; + snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + else { + /* for non igb_uio driver, need kernel version >= 3.8 */ + snprintf(filename, sizeof(filename), + "%s/sriov_numvfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + } + + /* get numa node, default to 0 if not present */ + snprintf(filename, sizeof(filename), "%s/numa_node", + dirname); + + if (access(filename, F_OK) != -1) { + if (eal_parse_sysfs_value(filename, &tmp) == 0) + dev->device.numa_node = tmp; + else + dev->device.numa_node = -1; + } else { + dev->device.numa_node = 0; + } + + pci_name_set(dev); + + /* parse resources */ + snprintf(filename, sizeof(filename), "%s/resource", dirname); + if (pci_parse_sysfs_resource(filename, dev) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); + free(dev); + return -1; + } + + /* parse driver */ + snprintf(filename, sizeof(filename), "%s/driver", dirname); + ret = pci_get_kernel_driver_by_path(filename, driver, sizeof(driver)); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); + free(dev); + return -1; + } + + if (!ret) { + if (!strcmp(driver, "vfio-pci")) + dev->kdrv = RTE_KDRV_VFIO; + else if (!strcmp(driver, "igb_uio")) + dev->kdrv = RTE_KDRV_IGB_UIO; + else if (!strcmp(driver, "uio_pci_generic")) + dev->kdrv = RTE_KDRV_UIO_GENERIC; + else + dev->kdrv = RTE_KDRV_UNKNOWN; + } else + dev->kdrv = RTE_KDRV_NONE; + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&rte_pci_bus.device_list)) { + rte_pci_add_device(dev); + } else { + struct rte_pci_device *dev2; + int ret; + + TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) { + ret = rte_pci_addr_cmp(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + + if (ret < 0) { + rte_pci_insert_device(dev2, dev); + } else { /* already registered */ + if (!rte_dev_is_probed(&dev2->device)) { + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + memcpy(&dev2->id, &dev->id, sizeof(dev2->id)); + pci_name_set(dev2); + memmove(dev2->mem_resource, + dev->mem_resource, + sizeof(dev->mem_resource)); + } else { + /** + * If device is plugged and driver is + * probed already, (This happens when + * we call rte_dev_probe which will + * scan all device on the bus) we don't + * need to do anything here unless... + **/ + if (dev2->kdrv != dev->kdrv || + dev2->max_vfs != dev->max_vfs || + memcmp(&dev2->id, &dev->id, sizeof(dev2->id))) + /* + * This should not happens. + * But it is still possible if + * we unbind a device from + * vfio or uio before hotplug + * remove and rebind it with + * a different configure. + * So we just print out the + * error as an alarm. + */ + RTE_LOG(ERR, EAL, "Unexpected device scan at %s!\n", + filename); + else if (dev2->device.devargs != + dev->device.devargs) { + rte_devargs_remove(dev2->device.devargs); + pci_name_set(dev2); + } + } + free(dev); + } + return 0; + } + + rte_pci_add_device(dev); + } + + return 0; +} + +int +pci_update_device(const struct rte_pci_addr *addr) +{ + char filename[PATH_MAX]; + + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT, + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, + addr->function); + + return pci_scan_one(filename, addr); +} + +/* + * split up a pci address into its constituent parts. + */ +static int +parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr) +{ + /* first split on ':' */ + union splitaddr { + struct { + char *domain; + char *bus; + char *devid; + char *function; + }; + char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ + } splitaddr; + + char *buf_copy = strndup(buf, bufsize); + if (buf_copy == NULL) + return -1; + + if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') + != PCI_FMT_NVAL - 1) + goto error; + /* final split is on '.' between devid and function */ + splitaddr.function = strchr(splitaddr.devid,'.'); + if (splitaddr.function == NULL) + goto error; + *splitaddr.function++ = '\0'; + + /* now convert to int values */ + errno = 0; + addr->domain = strtoul(splitaddr.domain, NULL, 16); + addr->bus = strtoul(splitaddr.bus, NULL, 16); + addr->devid = strtoul(splitaddr.devid, NULL, 16); + addr->function = strtoul(splitaddr.function, NULL, 10); + if (errno != 0) + goto error; + + free(buf_copy); /* free the copy made with strdup */ + return 0; +error: + free(buf_copy); + return -1; +} + +/* + * Scan the content of the PCI bus, and the devices in the devices + * list + */ +int +rte_pci_scan(void) +{ + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + struct rte_pci_addr addr; + + /* for debug purposes, PCI can be disabled */ + if (!rte_eal_has_pci()) + return 0; + +#ifdef VFIO_PRESENT + if (!pci_vfio_is_enabled()) + RTE_LOG(DEBUG, EAL, "VFIO PCI modules not loaded\n"); +#endif + + dir = opendir(rte_pci_get_sysfs_path()); + if (dir == NULL) { + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", + __func__, strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0) + continue; + + if (rte_pci_ignore_device(&addr)) + continue; + + snprintf(dirname, sizeof(dirname), "%s/%s", + rte_pci_get_sysfs_path(), e->d_name); + + if (pci_scan_one(dirname, &addr) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +#if defined(RTE_ARCH_X86) +bool +pci_device_iommu_support_va(const struct rte_pci_device *dev) +{ +#define VTD_CAP_MGAW_SHIFT 16 +#define VTD_CAP_MGAW_MASK (0x3fULL << VTD_CAP_MGAW_SHIFT) + const struct rte_pci_addr *addr = &dev->addr; + char filename[PATH_MAX]; + FILE *fp; + uint64_t mgaw, vtd_cap_reg = 0; + + snprintf(filename, sizeof(filename), + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap", + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, + addr->function); + + fp = fopen(filename, "r"); + if (fp == NULL) { + /* We don't have an Intel IOMMU, assume VA supported */ + if (errno == ENOENT) + return true; + + RTE_LOG(ERR, EAL, "%s(): can't open %s: %s\n", + __func__, filename, strerror(errno)); + return false; + } + + /* We have an Intel IOMMU */ + if (fscanf(fp, "%" PRIx64, &vtd_cap_reg) != 1) { + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename); + fclose(fp); + return false; + } + + fclose(fp); + + mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1; + + /* + * Assuming there is no limitation by now. We can not know at this point + * because the memory has not been initialized yet. Setting the dma mask + * will force a check once memory initialization is done. We can not do + * a fallback to IOVA PA now, but if the dma check fails, the error + * message should advice for using '--iova-mode pa' if IOVA VA is the + * current mode. + */ + rte_mem_set_dma_mask(mgaw); + return true; +} +#elif defined(RTE_ARCH_PPC_64) +bool +pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) +{ + /* + * IOMMU is always present on a PowerNV host (IOMMUv2). + * IOMMU is also present in a KVM/QEMU VM (IOMMUv1) but is not + * currently supported by DPDK. Test for our current environment + * and report VA support as appropriate. + */ + + char *line = NULL; + size_t len = 0; + char filename[PATH_MAX] = "/proc/cpuinfo"; + FILE *fp = fopen(filename, "r"); + bool ret = false; + + if (fp == NULL) { + RTE_LOG(ERR, EAL, "%s(): can't open %s: %s\n", + __func__, filename, strerror(errno)); + return ret; + } + + /* Check for a PowerNV platform */ + while (getline(&line, &len, fp) != -1) { + if (strstr(line, "platform") != NULL) + continue; + + if (strstr(line, "PowerNV") != NULL) { + RTE_LOG(DEBUG, EAL, "Running on a PowerNV system\n"); + ret = true; + break; + } + } + + free(line); + fclose(fp); + return ret; +} +#else +bool +pci_device_iommu_support_va(__rte_unused const struct rte_pci_device *dev) +{ + return true; +} +#endif + +enum rte_iova_mode +pci_device_iova_mode(const struct rte_pci_driver *pdrv, + const struct rte_pci_device *pdev) +{ + enum rte_iova_mode iova_mode = RTE_IOVA_DC; + + switch (pdev->kdrv) { + case RTE_KDRV_VFIO: { +#ifdef VFIO_PRESENT + static int is_vfio_noiommu_enabled = -1; + + if (is_vfio_noiommu_enabled == -1) { + if (rte_vfio_noiommu_is_enabled() == 1) + is_vfio_noiommu_enabled = 1; + else + is_vfio_noiommu_enabled = 0; + } + if (is_vfio_noiommu_enabled != 0) + iova_mode = RTE_IOVA_PA; + else if ((pdrv->drv_flags & RTE_PCI_DRV_NEED_IOVA_AS_VA) != 0) + iova_mode = RTE_IOVA_VA; +#endif + break; + } + + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + iova_mode = RTE_IOVA_PA; + break; + + default: + if ((pdrv->drv_flags & RTE_PCI_DRV_NEED_IOVA_AS_VA) != 0) + iova_mode = RTE_IOVA_VA; + break; + } + return iova_mode; +} + +/* Read PCI config space. */ +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) +{ + char devname[RTE_DEV_NAME_MAX_LEN] = ""; + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (device->kdrv) { + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + return pci_uio_read_config(intr_handle, buf, len, offset); +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + return pci_vfio_read_config(intr_handle, buf, len, offset); +#endif + default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); + RTE_LOG(ERR, EAL, + "Unknown driver type for %s\n", devname); + return -1; + } +} + +/* Write PCI config space. */ +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) +{ + char devname[RTE_DEV_NAME_MAX_LEN] = ""; + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (device->kdrv) { + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + return pci_uio_write_config(intr_handle, buf, len, offset); +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + return pci_vfio_write_config(intr_handle, buf, len, offset); +#endif + default: + rte_pci_device_name(&device->addr, devname, + RTE_DEV_NAME_MAX_LEN); + RTE_LOG(ERR, EAL, + "Unknown driver type for %s\n", devname); + return -1; + } +} + +#if defined(RTE_ARCH_X86) +static int +pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, + struct rte_pci_ioport *p) +{ + uint16_t start, end; + FILE *fp; + char *line = NULL; + char pci_id[16]; + int found = 0; + size_t linesz; + + if (rte_eal_iopl_init() != 0) { + RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n", + __func__, dev->name); + return -1; + } + + snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT, + dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + + fp = fopen("/proc/ioports", "r"); + if (fp == NULL) { + RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__); + return -1; + } + + while (getdelim(&line, &linesz, '\n', fp) > 0) { + char *ptr = line; + char *left; + int n; + + n = strcspn(ptr, ":"); + ptr[n] = 0; + left = &ptr[n + 1]; + + while (*left && isspace(*left)) + left++; + + if (!strncmp(left, pci_id, strlen(pci_id))) { + found = 1; + + while (*ptr && isspace(*ptr)) + ptr++; + + sscanf(ptr, "%04hx-%04hx", &start, &end); + + break; + } + } + + free(line); + fclose(fp); + + if (!found) + return -1; + + p->base = start; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start); + + return 0; +} +#endif + +int +rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + int ret = -1; + + switch (dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + if (pci_vfio_is_enabled()) + ret = pci_vfio_ioport_map(dev, bar, p); + break; +#endif + case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_map(dev, bar, p); + break; + case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#else + ret = pci_uio_ioport_map(dev, bar, p); +#endif + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#endif + break; + default: + break; + } + + if (!ret) + p->dev = dev; + + return ret; +} + +void +rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + pci_vfio_ioport_read(p, data, len, offset); + break; +#endif + case RTE_KDRV_IGB_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; + case RTE_KDRV_UIO_GENERIC: + pci_uio_ioport_read(p, data, len, offset); + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + pci_uio_ioport_read(p, data, len, offset); +#endif + break; + default: + break; + } +} + +void +rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + pci_vfio_ioport_write(p, data, len, offset); + break; +#endif + case RTE_KDRV_IGB_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; + case RTE_KDRV_UIO_GENERIC: + pci_uio_ioport_write(p, data, len, offset); + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + pci_uio_ioport_write(p, data, len, offset); +#endif + break; + default: + break; + } +} + +int +rte_pci_ioport_unmap(struct rte_pci_ioport *p) +{ + int ret = -1; + + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + if (pci_vfio_is_enabled()) + ret = pci_vfio_ioport_unmap(p); + break; +#endif + case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_unmap(p); + break; + case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = 0; +#else + ret = pci_uio_ioport_unmap(p); +#endif + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = 0; +#endif + break; + default: + break; + } + + return ret; +} diff --git a/src/spdk/dpdk/drivers/bus/pci/linux/pci_init.h b/src/spdk/dpdk/drivers/bus/pci/linux/pci_init.h new file mode 100644 index 000000000..c2e603a37 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/linux/pci_init.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef EAL_PCI_INIT_H_ +#define EAL_PCI_INIT_H_ + +#include <rte_vfio.h> + +/** IO resource type: */ +#define IORESOURCE_IO 0x00000100 +#define IORESOURCE_MEM 0x00000200 + +/* + * Helper function to map PCI resources right after hugepages in virtual memory + */ +extern void *pci_map_addr; +void *pci_find_max_end_va(void); + +/* parse one line of the "resource" sysfs file (note that the 'line' + * string is modified) + */ +int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, + uint64_t *end_addr, uint64_t *flags); + +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +int pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); +void pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); +void pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); +int pci_uio_ioport_unmap(struct rte_pci_ioport *p); + +#ifdef VFIO_PRESENT + +#ifdef PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#else +#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#endif + +#ifdef PCI_MSIX_TABLE_OFFSET +#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET +#else +#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 +#endif + +#ifdef PCI_MSIX_FLAGS_QSIZE +#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE +#else +#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff +#endif + +/* access config space */ +int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); +void pci_vfio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); +void pci_vfio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); +int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); + +/* map/unmap VFIO resource prototype */ +int pci_vfio_map_resource(struct rte_pci_device *dev); +int pci_vfio_unmap_resource(struct rte_pci_device *dev); + +int pci_vfio_is_enabled(void); + +#endif + +#endif /* EAL_PCI_INIT_H_ */ diff --git a/src/spdk/dpdk/drivers/bus/pci/linux/pci_uio.c b/src/spdk/dpdk/drivers/bus/pci/linux/pci_uio.c new file mode 100644 index 000000000..097dc1922 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/linux/pci_uio.c @@ -0,0 +1,569 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <dirent.h> +#include <inttypes.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/sysmacros.h> +#include <linux/pci_regs.h> + +#if defined(RTE_ARCH_X86) +#include <sys/io.h> +#endif + +#include <rte_string_fns.h> +#include <rte_log.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_common.h> +#include <rte_malloc.h> + +#include "eal_filesystem.h" +#include "pci_init.h" + +void *pci_map_addr = NULL; + +#define OFF_MAX ((uint64_t)(off_t)-1) + +int +pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offset) +{ + return pread(intr_handle->uio_cfg_fd, buf, len, offset); +} + +int +pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offset) +{ + return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); +} + +static int +pci_uio_set_bus_master(int dev_fd) +{ + uint16_t reg; + int ret; + + ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot read command from PCI config space!\n"); + return -1; + } + + /* return if bus mastering is already on */ + if (reg & PCI_COMMAND_MASTER) + return 0; + + reg |= PCI_COMMAND_MASTER; + + ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +static int +pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) +{ + FILE *f; + char filename[PATH_MAX]; + int ret; + unsigned major, minor; + dev_t dev; + + /* get the name of the sysfs file that contains the major and minor + * of the uio device and read its content */ + snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", + __func__); + return -1; + } + + ret = fscanf(f, "%u:%u", &major, &minor); + if (ret != 2) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", + __func__); + fclose(f); + return -1; + } + fclose(f); + + /* create the char device "mknod /dev/uioX c major minor" */ + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev = makedev(major, minor); + ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); + if (ret != 0) { + RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", + __func__, strerror(errno)); + return -1; + } + + return ret; +} + +/* + * Return the uioX char device used for a pci device. On success, return + * the UIO number and fill dstbuf string with the path of the device in + * sysfs. On error, return a negative value. In this case dstbuf is + * invalid. + */ +static int +pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, + unsigned int buflen, int create) +{ + struct rte_pci_addr *loc = &dev->addr; + int uio_num = -1; + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + + /* depending on kernel version, uio can be located in uio/uioX + * or uio:uioX */ + + snprintf(dirname, sizeof(dirname), + "%s/" PCI_PRI_FMT "/uio", rte_pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, loc->function); + + dir = opendir(dirname); + if (dir == NULL) { + /* retry with the parent directory */ + snprintf(dirname, sizeof(dirname), + "%s/" PCI_PRI_FMT, rte_pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, loc->function); + dir = opendir(dirname); + + if (dir == NULL) { + RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); + return -1; + } + } + + /* take the first file starting with "uio" */ + while ((e = readdir(dir)) != NULL) { + /* format could be uio%d ...*/ + int shortprefix_len = sizeof("uio") - 1; + /* ... or uio:uio%d */ + int longprefix_len = sizeof("uio:uio") - 1; + char *endptr; + + if (strncmp(e->d_name, "uio", 3) != 0) + continue; + + /* first try uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); + break; + } + + /* then try uio:uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + longprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); + break; + } + } + closedir(dir); + + /* No uio resource found */ + if (e == NULL) + return -1; + + /* create uio device if we've been asked to */ + if (rte_eal_create_uio_dev() && create && + pci_mknod_uio_dev(dstbuf, uio_num) < 0) + RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); + + return uio_num; +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + if (dev->intr_handle.fd >= 0) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char dirname[PATH_MAX]; + char cfgname[PATH_MAX]; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + int uio_num; + struct rte_pci_addr *loc; + + loc = &dev->addr; + + /* find uio resource */ + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1); + if (uio_num < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + snprintf(cfgname, sizeof(cfgname), + "/sys/class/uio/uio%u/device/config", uio_num); + dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); + if (dev->intr_handle.uio_cfg_fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + cfgname, strerror(errno)); + goto error; + } + + if (dev->kdrv == RTE_KDRV_IGB_UIO) + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + else { + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; + + /* set bus master that is not done by uio_pci_generic */ + if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); + goto error; + } + } + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + strlcpy((*uio_res)->path, devname, sizeof((*uio_res)->path)); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd = -1; + char devname[PATH_MAX]; + void *mapaddr; + struct rte_pci_addr *loc; + struct pci_map *maps; + int wc_activate = 0; + + if (dev->driver != NULL) + wc_activate = dev->driver->drv_flags & RTE_PCI_DRV_WC_ACTIVATE; + + loc = &dev->addr; + maps = uio_res->maps; + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, sizeof(devname), 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + if (wc_activate) { + /* update devname for mmap */ + snprintf(devname, sizeof(devname), + "%s/" PCI_PRI_FMT "/resource%d_wc", + rte_pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, + loc->function, res_idx); + + fd = open(devname, O_RDWR); + if (fd < 0 && errno != ENOENT) { + RTE_LOG(INFO, EAL, "%s cannot be mapped. " + "Fall-back to non prefetchable mode.\n", + devname); + } + } + + if (!wc_activate || fd < 0) { + snprintf(devname, sizeof(devname), + "%s/" PCI_PRI_FMT "/resource%d", + rte_pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, + loc->function, res_idx); + + /* then try to map resource file */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + } + + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + mapaddr = pci_map_resource(pci_map_addr, fd, 0, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + pci_map_addr = RTE_PTR_ADD(mapaddr, + (size_t)dev->mem_resource[res_idx].len); + + pci_map_addr = RTE_PTR_ALIGN(pci_map_addr, sysconf(_SC_PAGE_SIZE)); + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = 0; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} + +#if defined(RTE_ARCH_X86) +int +pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + char dirname[PATH_MAX]; + char filename[PATH_MAX]; + int uio_num; + unsigned long start; + + if (rte_eal_iopl_init() != 0) { + RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for PCI device %s\n", + __func__, dev->name); + return -1; + } + + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0); + if (uio_num < 0) + return -1; + + /* get portio start */ + snprintf(filename, sizeof(filename), + "%s/portio/port%d/start", dirname, bar); + if (eal_parse_sysfs_value(filename, &start) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", + __func__); + return -1; + } + /* ensure we don't get anything funny here, read/write will cast to + * uin16_t */ + if (start > UINT16_MAX) + return -1; + + /* FIXME only for primary process ? */ + if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { + + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev->intr_handle.fd = open(filename, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + filename, strerror(errno)); + return -1; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + } + + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); + + p->base = start; + p->len = 0; + return 0; +} +#else +int +pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + FILE *f; + char buf[BUFSIZ]; + char filename[PATH_MAX]; + uint64_t phys_addr, end_addr, flags; + int fd, i; + void *addr; + + /* open and read addresses of the corresponding resource in sysfs */ + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource", + rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n", + strerror(errno)); + return -1; + } + for (i = 0; i < bar + 1; i++) { + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n"); + goto error; + } + } + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, + &end_addr, &flags) < 0) + goto error; + if ((flags & IORESOURCE_IO) == 0) { + RTE_LOG(ERR, EAL, "BAR %d is not an IO resource\n", bar); + goto error; + } + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource%d", + rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function, bar); + + /* mmap the pci resource */ + fd = open(filename, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + goto error; + } + addr = mmap(NULL, end_addr + 1, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Cannot mmap IO port resource: %s\n", + strerror(errno)); + goto error; + } + + /* strangely, the base address is mmap addr + phys_addr */ + p->base = (uintptr_t)addr + phys_addr; + p->len = end_addr + 1; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%"PRIx64"\n", p->base); + fclose(f); + + return 0; + +error: + fclose(f); + return -1; +} +#endif + +void +pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + uint8_t *d; + int size; + uintptr_t reg = p->base + offset; + + for (d = data; len > 0; d += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; +#if defined(RTE_ARCH_X86) + *(uint32_t *)d = inl(reg); +#else + *(uint32_t *)d = *(volatile uint32_t *)reg; +#endif + } else if (len >= 2) { + size = 2; +#if defined(RTE_ARCH_X86) + *(uint16_t *)d = inw(reg); +#else + *(uint16_t *)d = *(volatile uint16_t *)reg; +#endif + } else { + size = 1; +#if defined(RTE_ARCH_X86) + *d = inb(reg); +#else + *d = *(volatile uint8_t *)reg; +#endif + } + } +} + +void +pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + const uint8_t *s; + int size; + uintptr_t reg = p->base + offset; + + for (s = data; len > 0; s += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; +#if defined(RTE_ARCH_X86) + outl_p(*(const uint32_t *)s, reg); +#else + *(volatile uint32_t *)reg = *(const uint32_t *)s; +#endif + } else if (len >= 2) { + size = 2; +#if defined(RTE_ARCH_X86) + outw_p(*(const uint16_t *)s, reg); +#else + *(volatile uint16_t *)reg = *(const uint16_t *)s; +#endif + } else { + size = 1; +#if defined(RTE_ARCH_X86) + outb_p(*s, reg); +#else + *(volatile uint8_t *)reg = *s; +#endif + } + } +} + +int +pci_uio_ioport_unmap(struct rte_pci_ioport *p) +{ +#if defined(RTE_ARCH_X86) + RTE_SET_USED(p); + /* FIXME close intr fd ? */ + return 0; +#else + return munmap((void *)(uintptr_t)p->base, p->len); +#endif +} diff --git a/src/spdk/dpdk/drivers/bus/pci/linux/pci_vfio.c b/src/spdk/dpdk/drivers/bus/pci/linux/pci_vfio.c new file mode 100644 index 000000000..64cd84a68 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/linux/pci_vfio.c @@ -0,0 +1,1070 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <string.h> +#include <fcntl.h> +#include <linux/pci_regs.h> +#include <sys/eventfd.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <stdbool.h> + +#include <rte_log.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_malloc.h> +#include <rte_vfio.h> +#include <rte_eal.h> +#include <rte_bus.h> +#include <rte_spinlock.h> +#include <rte_tailq.h> + +#include "eal_filesystem.h" + +#include "pci_init.h" +#include "private.h" + +/** + * @file + * PCI probing under linux (VFIO version) + * + * This code tries to determine if the PCI device is bound to VFIO driver, + * and initialize it (map BARs, set up interrupts) if that's the case. + * + * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". + */ + +#ifdef VFIO_PRESENT + +#ifndef PAGE_SIZE +#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#endif +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +static struct rte_tailq_elem rte_vfio_tailq = { + .name = "VFIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_vfio_tailq) + +int +pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs) +{ + return pread64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +int +pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs) +{ + return pwrite64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +/* get PCI BAR number where MSI-X interrupts are */ +static int +pci_vfio_get_msix_bar(int fd, struct pci_msix_table *msix_table) +{ + int ret; + uint32_t reg; + uint16_t flags; + uint8_t cap_id, cap_offset; + + /* read PCI capability pointer from config space */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_CAPABILITY_LIST); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_offset = reg & 0xFF; + + while (cap_offset) { + + /* read PCI capability ID */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_id = reg & 0xFF; + + /* if we haven't reached MSI-X, check next capability */ + if (cap_id != PCI_CAP_ID_MSIX) { + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need second byte */ + cap_offset = (reg & 0xFF00) >> 8; + + continue; + } + /* else, read table offset */ + else { + /* table offset resides in the next 4 bytes */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 4); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " + "space!\n"); + return -1; + } + + ret = pread64(fd, &flags, sizeof(flags), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 2); + if (ret != sizeof(flags)) { + RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " + "space!\n"); + return -1; + } + + msix_table->bar_index = reg & RTE_PCI_MSIX_TABLE_BIR; + msix_table->offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; + msix_table->size = + 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); + + return 0; + } + } + return 0; +} + +/* set PCI bus mastering */ +static int +pci_vfio_set_bus_master(int dev_fd, bool op) +{ + uint16_t reg; + int ret; + + ret = pread64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); + return -1; + } + + if (op) + /* set the master bit */ + reg |= PCI_COMMAND_MASTER; + else + reg &= ~(PCI_COMMAND_MASTER); + + ret = pwrite64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +/* set up interrupt support (but not enable interrupts) */ +static int +pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int i, ret, intr_idx; + enum rte_intr_mode intr_mode; + + /* default to invalid index */ + intr_idx = VFIO_PCI_NUM_IRQS; + + /* Get default / configured intr_mode */ + intr_mode = rte_eal_vfio_intr_mode(); + + /* get interrupt type from internal config (MSI-X by default, can be + * overridden from the command line + */ + switch (intr_mode) { + case RTE_INTR_MODE_MSIX: + intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; + break; + case RTE_INTR_MODE_MSI: + intr_idx = VFIO_PCI_MSI_IRQ_INDEX; + break; + case RTE_INTR_MODE_LEGACY: + intr_idx = VFIO_PCI_INTX_IRQ_INDEX; + break; + /* don't do anything if we want to automatically determine interrupt type */ + case RTE_INTR_MODE_NONE: + break; + default: + RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); + return -1; + } + + /* start from MSI-X interrupt type */ + for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { + struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; + + /* skip interrupt modes we don't want */ + if (intr_mode != RTE_INTR_MODE_NONE && + i != intr_idx) + continue; + + irq.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); + if (ret < 0) { + RTE_LOG(ERR, EAL, " cannot get IRQ info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* if this vector cannot be used with eventfd, fail if we explicitly + * specified interrupt type, otherwise continue */ + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; + + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } + + return 0; + } + + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; +} + +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE +/* + * Spinlock for device hot-unplug failure handling. + * If it tries to access bus or device, such as handle sigbus on bus + * or handle memory failure for device, just need to use this lock. + * It could protect the bus and the device to avoid race condition. + */ +static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER; + +static void +pci_vfio_req_handler(void *param) +{ + struct rte_bus *bus; + int ret; + struct rte_device *device = (struct rte_device *)param; + + rte_spinlock_lock(&failure_handle_lock); + bus = rte_bus_find_by_device(device); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n", + device->name); + goto handle_end; + } + + /* + * vfio kernel module request user space to release allocated + * resources before device be deleted in kernel, so it can directly + * call the vfio bus hot-unplug handler to process it. + */ + ret = bus->hot_unplug_handler(device); + if (ret) + RTE_LOG(ERR, EAL, + "Can not handle hot-unplug for device (%s)\n", + device->name); +handle_end: + rte_spinlock_unlock(&failure_handle_lock); +} + +/* enable notifier (only enable req now) */ +static int +pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int ret; + int fd = -1; + + /* set up an eventfd for req notifier */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + dev->vfio_req_intr_handle.fd = fd; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ; + dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; + + ret = rte_intr_callback_register(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret) { + RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n"); + goto error; + } + + ret = rte_intr_enable(&dev->vfio_req_intr_handle); + if (ret) { + RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n"); + ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret < 0) + RTE_LOG(ERR, EAL, + "Fail to unregister req notifier handler.\n"); + goto error; + } + + return 0; +error: + close(fd); + + dev->vfio_req_intr_handle.fd = -1; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + dev->vfio_req_intr_handle.vfio_dev_fd = -1; + + return -1; +} + +/* disable notifier (only disable req now) */ +static int +pci_vfio_disable_notifier(struct rte_pci_device *dev) +{ + int ret; + + ret = rte_intr_disable(&dev->vfio_req_intr_handle); + if (ret) { + RTE_LOG(ERR, EAL, "fail to disable req notifier.\n"); + return -1; + } + + ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle, + pci_vfio_req_handler, + (void *)&dev->device); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "fail to unregister req notifier handler.\n"); + return -1; + } + + close(dev->vfio_req_intr_handle.fd); + + dev->vfio_req_intr_handle.fd = -1; + dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + dev->vfio_req_intr_handle.vfio_dev_fd = -1; + + return 0; +} +#endif + +static int +pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index) +{ + uint32_t ioport_bar; + int ret; + + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_BASE_ADDRESS_0 + bar_index*4); + if (ret != sizeof(ioport_bar)) { + RTE_LOG(ERR, EAL, "Cannot read command (%x) from config space!\n", + PCI_BASE_ADDRESS_0 + bar_index*4); + return -1; + } + + return (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) != 0; +} + +static int +pci_rte_vfio_setup_device(struct rte_pci_device *dev, int vfio_dev_fd) +{ + if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, "Error setting up interrupts!\n"); + return -1; + } + + /* set bus mastering for the device */ + if (pci_vfio_set_bus_master(vfio_dev_fd, true)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); + return -1; + } + + /* + * Reset the device. If the device is not capable of resetting, + * then it updates errno as EINVAL. + */ + if (ioctl(vfio_dev_fd, VFIO_DEVICE_RESET) && errno != EINVAL) { + RTE_LOG(ERR, EAL, "Unable to reset device! Error: %d (%s)\n", + errno, strerror(errno)); + return -1; + } + + return 0; +} + +static int +pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res, + int bar_index, int additional_flags) +{ + struct memreg { + uint64_t offset; + size_t size; + } memreg[2] = {}; + void *bar_addr; + struct pci_msix_table *msix_table = &vfio_res->msix_table; + struct pci_map *bar = &vfio_res->maps[bar_index]; + + if (bar->size == 0) { + RTE_LOG(DEBUG, EAL, "Bar size is 0, skip BAR%d\n", bar_index); + return 0; + } + + if (msix_table->bar_index == bar_index) { + /* + * VFIO will not let us map the MSI-X table, + * but we can map around it. + */ + uint32_t table_start = msix_table->offset; + uint32_t table_end = table_start + msix_table->size; + table_end = RTE_ALIGN(table_end, PAGE_SIZE); + table_start = RTE_ALIGN_FLOOR(table_start, PAGE_SIZE); + + /* If page-aligned start of MSI-X table is less than the + * actual MSI-X table start address, reassign to the actual + * start address. + */ + if (table_start < msix_table->offset) + table_start = msix_table->offset; + + if (table_start == 0 && table_end >= bar->size) { + /* Cannot map this BAR */ + RTE_LOG(DEBUG, EAL, "Skipping BAR%d\n", bar_index); + bar->size = 0; + bar->addr = 0; + return 0; + } + + memreg[0].offset = bar->offset; + memreg[0].size = table_start; + if (bar->size < table_end) { + /* + * If MSI-X table end is beyond BAR end, don't attempt + * to perform second mapping. + */ + memreg[1].offset = 0; + memreg[1].size = 0; + } else { + memreg[1].offset = bar->offset + table_end; + memreg[1].size = bar->size - table_end; + } + + RTE_LOG(DEBUG, EAL, + "Trying to map BAR%d that contains the MSI-X " + "table. Trying offsets: " + "0x%04" PRIx64 ":0x%04zx, 0x%04" PRIx64 ":0x%04zx\n", + bar_index, + memreg[0].offset, memreg[0].size, + memreg[1].offset, memreg[1].size); + } else { + memreg[0].offset = bar->offset; + memreg[0].size = bar->size; + } + + /* reserve the address using an inaccessible mapping */ + bar_addr = mmap(bar->addr, bar->size, 0, MAP_PRIVATE | + MAP_ANONYMOUS | additional_flags, -1, 0); + if (bar_addr != MAP_FAILED) { + void *map_addr = NULL; + if (memreg[0].size) { + /* actual map of first part */ + map_addr = pci_map_resource(bar_addr, vfio_dev_fd, + memreg[0].offset, + memreg[0].size, + MAP_FIXED); + } + + /* if there's a second part, try to map it */ + if (map_addr != MAP_FAILED + && memreg[1].offset && memreg[1].size) { + void *second_addr = RTE_PTR_ADD(bar_addr, + (uintptr_t)(memreg[1].offset - + bar->offset)); + map_addr = pci_map_resource(second_addr, + vfio_dev_fd, + memreg[1].offset, + memreg[1].size, + MAP_FIXED); + } + + if (map_addr == MAP_FAILED || !map_addr) { + munmap(bar_addr, bar->size); + bar_addr = MAP_FAILED; + RTE_LOG(ERR, EAL, "Failed to map pci BAR%d\n", + bar_index); + return -1; + } + } else { + RTE_LOG(ERR, EAL, + "Failed to create inaccessible mapping for BAR%d\n", + bar_index); + return -1; + } + + bar->addr = bar_addr; + return 0; +} + +/* + * region info may contain capability headers, so we need to keep reallocating + * the memory until we match allocated memory size with argsz. + */ +static int +pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info, + int region) +{ + struct vfio_region_info *ri; + size_t argsz = sizeof(*ri); + int ret; + + ri = malloc(sizeof(*ri)); + if (ri == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n"); + return -1; + } +again: + memset(ri, 0, argsz); + ri->argsz = argsz; + ri->index = region; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri); + if (ret < 0) { + free(ri); + return ret; + } + if (ri->argsz != argsz) { + struct vfio_region_info *tmp; + + argsz = ri->argsz; + tmp = realloc(ri, argsz); + + if (tmp == NULL) { + /* realloc failed but the ri is still there */ + free(ri); + RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n"); + return -1; + } + ri = tmp; + goto again; + } + *info = ri; + + return 0; +} + +static struct vfio_info_cap_header * +pci_vfio_info_cap(struct vfio_region_info *info, int cap) +{ + struct vfio_info_cap_header *h; + size_t offset; + + if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) { + /* VFIO info does not advertise capabilities */ + return NULL; + } + + offset = VFIO_CAP_OFFSET(info); + while (offset != 0) { + h = RTE_PTR_ADD(info, offset); + if (h->id == cap) + return h; + offset = h->next; + } + return NULL; +} + +static int +pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region) +{ + struct vfio_region_info *info; + int ret; + + ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region); + if (ret < 0) + return -1; + + ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL; + + /* cleanup */ + free(info); + + return ret; +} + + +static int +pci_vfio_map_resource_primary(struct rte_pci_device *dev) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + char pci_addr[PATH_MAX] = {0}; + int vfio_dev_fd; + struct rte_pci_addr *loc = &dev->addr; + int i, ret; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list = + RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + + struct pci_map *maps; + + dev->intr_handle.fd = -1; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.fd = -1; +#endif + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, + &vfio_dev_fd, &device_info); + if (ret) + return ret; + + /* allocate vfio_res and get region info */ + vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store vfio mmap details\n", __func__); + goto err_vfio_dev_fd; + } + memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); + + /* get number of registers (up to BAR5) */ + vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, + VFIO_PCI_BAR5_REGION_INDEX + 1); + + /* map BARs */ + maps = vfio_res->maps; + + vfio_res->msix_table.bar_index = -1; + /* get MSI-X BAR, if any (we have to know where it is because we can't + * easily mmap it when using VFIO) + */ + ret = pci_vfio_get_msix_bar(vfio_dev_fd, &vfio_res->msix_table); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", + pci_addr); + goto err_vfio_res; + } + /* if we found our MSI-X BAR region, check if we can mmap it */ + if (vfio_res->msix_table.bar_index != -1) { + int ret = pci_vfio_msix_is_mappable(vfio_dev_fd, + vfio_res->msix_table.bar_index); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n"); + goto err_vfio_res; + } else if (ret != 0) { + /* we can map it, so we don't care where it is */ + RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n"); + vfio_res->msix_table.bar_index = -1; + } + } + + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + struct vfio_region_info *reg = NULL; + void *bar_addr; + + ret = pci_vfio_get_region_info(vfio_dev_fd, ®, i); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s cannot get device region info " + "error %i (%s)\n", pci_addr, errno, + strerror(errno)); + goto err_vfio_res; + } + + /* chk for io port region */ + ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i); + if (ret < 0) { + free(reg); + goto err_vfio_res; + } else if (ret) { + RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n", + i); + free(reg); + continue; + } + + /* skip non-mmapable BARs */ + if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) { + free(reg); + continue; + } + + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + bar_addr = pci_map_addr; + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size); + + pci_map_addr = RTE_PTR_ALIGN(pci_map_addr, + sysconf(_SC_PAGE_SIZE)); + + maps[i].addr = bar_addr; + maps[i].offset = reg->offset; + maps[i].size = reg->size; + maps[i].path = NULL; /* vfio doesn't have per-resource paths */ + + ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", + pci_addr, i, strerror(errno)); + free(reg); + goto err_vfio_res; + } + + dev->mem_resource[i].addr = maps[i].addr; + + free(reg); + } + + if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) { + RTE_LOG(ERR, EAL, " %s setup device failed\n", pci_addr); + goto err_vfio_res; + } + +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, "Error setting up notifier!\n"); + goto err_vfio_res; + } + +#endif + TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); + + return 0; +err_vfio_res: + rte_free(vfio_res); +err_vfio_dev_fd: + close(vfio_dev_fd); + return -1; +} + +static int +pci_vfio_map_resource_secondary(struct rte_pci_device *dev) +{ + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + char pci_addr[PATH_MAX] = {0}; + int vfio_dev_fd; + struct rte_pci_addr *loc = &dev->addr; + int i, ret; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list = + RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + + struct pci_map *maps; + + dev->intr_handle.fd = -1; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.fd = -1; +#endif + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + /* if we're in a secondary process, just find our tailq entry */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (rte_pci_addr_cmp(&vfio_res->pci_addr, + &dev->addr)) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + return -1; + } + + ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr, + &vfio_dev_fd, &device_info); + if (ret) + return ret; + + /* map BARs */ + maps = vfio_res->maps; + + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, MAP_FIXED); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", + pci_addr, i, strerror(errno)); + goto err_vfio_dev_fd; + } + + dev->mem_resource[i].addr = maps[i].addr; + } + + /* we need save vfio_dev_fd, so it can be used during release */ + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd; +#endif + + return 0; +err_vfio_dev_fd: + close(vfio_dev_fd); + return -1; +} + +/* + * map the PCI resources of a PCI device in virtual memory (VFIO version). + * primary and secondary processes follow almost exactly the same path + */ +int +pci_vfio_map_resource(struct rte_pci_device *dev) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return pci_vfio_map_resource_primary(dev); + else + return pci_vfio_map_resource_secondary(dev); +} + +static struct mapped_pci_resource * +find_and_unmap_vfio_resource(struct mapped_pci_res_list *vfio_res_list, + struct rte_pci_device *dev, + const char *pci_addr) +{ + struct mapped_pci_resource *vfio_res = NULL; + struct pci_map *maps; + int i; + + /* Get vfio_res */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (rte_pci_addr_cmp(&vfio_res->pci_addr, &dev->addr)) + continue; + break; + } + + if (vfio_res == NULL) + return vfio_res; + + RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n", + pci_addr); + + maps = vfio_res->maps; + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + + /* + * We do not need to be aware of MSI-X table BAR mappings as + * when mapping. Just using current maps array is enough + */ + if (maps[i].addr) { + RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n", + pci_addr, maps[i].addr); + pci_unmap_resource(maps[i].addr, maps[i].size); + } + } + + return vfio_res; +} + +static int +pci_vfio_unmap_resource_primary(struct rte_pci_device *dev) +{ + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list; + int ret; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + ret = pci_vfio_disable_notifier(dev); + if (ret) { + RTE_LOG(ERR, EAL, "fail to disable req notifier.\n"); + return -1; + } + +#endif + if (close(dev->intr_handle.fd) < 0) { + RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n", + pci_addr); + return -1; + } + + if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) { + RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n", + pci_addr); + return -1; + } + + ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, + dev->intr_handle.vfio_dev_fd); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot release device\n", __func__); + return ret; + } + + vfio_res_list = + RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr); + + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + return -1; + } + + TAILQ_REMOVE(vfio_res_list, vfio_res, next); + + return 0; +} + +static int +pci_vfio_unmap_resource_secondary(struct rte_pci_device *dev) +{ + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list; + int ret; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + ret = rte_vfio_release_device(rte_pci_get_sysfs_path(), pci_addr, + dev->intr_handle.vfio_dev_fd); + if (ret < 0) { + RTE_LOG(ERR, EAL, + "%s(): cannot release device\n", __func__); + return ret; + } + + vfio_res_list = + RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + vfio_res = find_and_unmap_vfio_resource(vfio_res_list, dev, pci_addr); + + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + return -1; + } + + return 0; +} + +int +pci_vfio_unmap_resource(struct rte_pci_device *dev) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + return pci_vfio_unmap_resource_primary(dev); + else + return pci_vfio_unmap_resource_secondary(dev); +} + +int +pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + if (bar < VFIO_PCI_BAR0_REGION_INDEX || + bar > VFIO_PCI_BAR5_REGION_INDEX) { + RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); + return -1; + } + + p->dev = dev; + p->base = VFIO_GET_REGION_ADDR(bar); + return 0; +} + +void +pci_vfio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; + + if (pread64(intr_handle->vfio_dev_fd, data, + len, p->base + offset) <= 0) + RTE_LOG(ERR, EAL, + "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n", + VFIO_GET_REGION_IDX(p->base), (int)offset); +} + +void +pci_vfio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; + + if (pwrite64(intr_handle->vfio_dev_fd, data, + len, p->base + offset) <= 0) + RTE_LOG(ERR, EAL, + "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n", + VFIO_GET_REGION_IDX(p->base), (int)offset); +} + +int +pci_vfio_ioport_unmap(struct rte_pci_ioport *p) +{ + RTE_SET_USED(p); + return -1; +} + +int +pci_vfio_is_enabled(void) +{ + return rte_vfio_is_enabled("vfio_pci"); +} +#endif diff --git a/src/spdk/dpdk/drivers/bus/pci/meson.build b/src/spdk/dpdk/drivers/bus/pci/meson.build new file mode 100644 index 000000000..b520bdfc1 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/meson.build @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +deps += ['pci'] +install_headers('rte_bus_pci.h') +sources = files('pci_common.c', + 'pci_common_uio.c', + 'pci_params.c') +if is_linux + sources += files('linux/pci.c', + 'linux/pci_uio.c', + 'linux/pci_vfio.c') + includes += include_directories('linux') +else + sources += files('bsd/pci.c') + includes += include_directories('bsd') +endif + +deps += ['kvargs'] diff --git a/src/spdk/dpdk/drivers/bus/pci/pci_common.c b/src/spdk/dpdk/drivers/bus/pci/pci_common.c new file mode 100644 index 000000000..245d94f59 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/pci_common.c @@ -0,0 +1,687 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation. + * Copyright 2013-2014 6WIND S.A. + */ + +#include <string.h> +#include <inttypes.h> +#include <stdint.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/queue.h> +#include <sys/mman.h> + +#include <rte_errno.h> +#include <rte_interrupts.h> +#include <rte_log.h> +#include <rte_bus.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_per_lcore.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_string_fns.h> +#include <rte_common.h> +#include <rte_devargs.h> +#include <rte_vfio.h> + +#include "private.h" + + +#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" + +const char *rte_pci_get_sysfs_path(void) +{ + const char *path = NULL; + + path = getenv("SYSFS_PCI_DEVICES"); + if (path == NULL) + return SYSFS_PCI_DEVICES; + + return path; +} + +static struct rte_devargs * +pci_devargs_lookup(const struct rte_pci_addr *pci_addr) +{ + struct rte_devargs *devargs; + struct rte_pci_addr addr; + + RTE_EAL_DEVARGS_FOREACH("pci", devargs) { + devargs->bus->parse(devargs->name, &addr); + if (!rte_pci_addr_cmp(pci_addr, &addr)) + return devargs; + } + return NULL; +} + +void +pci_name_set(struct rte_pci_device *dev) +{ + struct rte_devargs *devargs; + + /* Each device has its internal, canonical name set. */ + rte_pci_device_name(&dev->addr, + dev->name, sizeof(dev->name)); + devargs = pci_devargs_lookup(&dev->addr); + dev->device.devargs = devargs; + /* In blacklist mode, if the device is not blacklisted, no + * rte_devargs exists for it. + */ + if (devargs != NULL) + /* If an rte_devargs exists, the generic rte_device uses the + * given name as its name. + */ + dev->device.name = dev->device.devargs->name; + else + /* Otherwise, it uses the internal, canonical form. */ + dev->device.name = dev->name; +} + +/* + * Match the PCI Driver and Device using the ID Table + */ +int +rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev) +{ + const struct rte_pci_id *id_table; + + for (id_table = pci_drv->id_table; id_table->vendor_id != 0; + id_table++) { + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != pci_dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != pci_dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != + pci_dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != + pci_dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + if (id_table->class_id != pci_dev->id.class_id && + id_table->class_id != RTE_CLASS_ANY_ID) + continue; + + return 1; + } + + return 0; +} + +/* + * If vendor/device ID match, call the probe() function of the + * driver. + */ +static int +rte_pci_probe_one_driver(struct rte_pci_driver *dr, + struct rte_pci_device *dev) +{ + int ret; + bool already_probed; + struct rte_pci_addr *loc; + + if ((dr == NULL) || (dev == NULL)) + return -EINVAL; + + loc = &dev->addr; + + /* The device is not blacklisted; Check if driver supports it */ + if (!rte_pci_match(dr, dev)) + /* Match of device and driver failed */ + return 1; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, loc->function, + dev->device.numa_node); + + /* no initialization when blacklisted, return without error */ + if (dev->device.devargs != NULL && + dev->device.devargs->policy == + RTE_DEV_BLACKLISTED) { + RTE_LOG(INFO, EAL, " Device is blacklisted, not" + " initializing\n"); + return 1; + } + + if (dev->device.numa_node < 0) { + RTE_LOG(WARNING, EAL, " Invalid NUMA socket, default to 0\n"); + dev->device.numa_node = 0; + } + + already_probed = rte_dev_is_probed(&dev->device); + if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) { + RTE_LOG(DEBUG, EAL, "Device %s is already probed\n", + dev->device.name); + return -EEXIST; + } + + RTE_LOG(DEBUG, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->driver.name); + + /* + * reference driver structure + * This needs to be before rte_pci_map_device(), as it enables to use + * driver flags for adjusting configuration. + */ + if (!already_probed) { + enum rte_iova_mode dev_iova_mode; + enum rte_iova_mode iova_mode; + + dev_iova_mode = pci_device_iova_mode(dr, dev); + iova_mode = rte_eal_iova_mode(); + if (dev_iova_mode != RTE_IOVA_DC && + dev_iova_mode != iova_mode) { + RTE_LOG(ERR, EAL, " Expecting '%s' IOVA mode but current mode is '%s', not initializing\n", + dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA", + iova_mode == RTE_IOVA_PA ? "PA" : "VA"); + return -EINVAL; + } + + dev->driver = dr; + } + + if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) { + /* map resources for devices that use igb_uio */ + ret = rte_pci_map_device(dev); + if (ret != 0) { + dev->driver = NULL; + return ret; + } + } + + RTE_LOG(INFO, EAL, "Probe PCI driver: %s (%x:%x) device: "PCI_PRI_FMT" (socket %i)\n", + dr->driver.name, dev->id.vendor_id, dev->id.device_id, + loc->domain, loc->bus, loc->devid, loc->function, + dev->device.numa_node); + /* call the driver probe() function */ + ret = dr->probe(dr, dev); + if (already_probed) + return ret; /* no rollback if already succeeded earlier */ + if (ret) { + dev->driver = NULL; + if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) && + /* Don't unmap if device is unsupported and + * driver needs mapped resources. + */ + !(ret > 0 && + (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES))) + rte_pci_unmap_device(dev); + } else { + dev->device.driver = &dr->driver; + } + + return ret; +} + +/* + * If vendor/device ID match, call the remove() function of the + * driver. + */ +static int +rte_pci_detach_dev(struct rte_pci_device *dev) +{ + struct rte_pci_addr *loc; + struct rte_pci_driver *dr; + int ret = 0; + + if (dev == NULL) + return -EINVAL; + + dr = dev->driver; + loc = &dev->addr; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, + loc->function, dev->device.numa_node); + + RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->driver.name); + + if (dr->remove) { + ret = dr->remove(dev); + if (ret < 0) + return ret; + } + + /* clear driver structure */ + dev->driver = NULL; + dev->device.driver = NULL; + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + /* unmap resources for devices that use igb_uio */ + rte_pci_unmap_device(dev); + + return 0; +} + +/* + * If vendor/device ID match, call the probe() function of all + * registered driver for the given device. Return < 0 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +pci_probe_all_drivers(struct rte_pci_device *dev) +{ + struct rte_pci_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -EINVAL; + + FOREACH_DRIVER_ON_PCIBUS(dr) { + rc = rte_pci_probe_one_driver(dr, dev); + if (rc < 0) + /* negative value is an error */ + return rc; + if (rc > 0) + /* positive value means driver doesn't support it */ + continue; + return 0; + } + return 1; +} + +/* + * Scan the content of the PCI bus, and call the probe() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +static int +pci_probe(void) +{ + struct rte_pci_device *dev = NULL; + size_t probed = 0, failed = 0; + int ret = 0; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + probed++; + + ret = pci_probe_all_drivers(dev); + if (ret < 0) { + if (ret != -EEXIST) { + RTE_LOG(ERR, EAL, "Requested device " + PCI_PRI_FMT " cannot be used\n", + dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + rte_errno = errno; + failed++; + } + ret = 0; + } + } + + return (probed && probed == failed) ? -1 : 0; +} + +/* dump one device */ +static int +pci_dump_one_device(FILE *f, struct rte_pci_device *dev) +{ + int i; + + fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id, + dev->id.device_id); + + for (i = 0; i != sizeof(dev->mem_resource) / + sizeof(dev->mem_resource[0]); i++) { + fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n", + dev->mem_resource[i].phys_addr, + dev->mem_resource[i].len); + } + return 0; +} + +/* dump devices on the bus */ +void +rte_pci_dump(FILE *f) +{ + struct rte_pci_device *dev = NULL; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + pci_dump_one_device(f, dev); + } +} + +static int +pci_parse(const char *name, void *addr) +{ + struct rte_pci_addr *out = addr; + struct rte_pci_addr pci_addr; + bool parse; + + parse = (rte_pci_addr_parse(name, &pci_addr) == 0); + if (parse && addr != NULL) + *out = pci_addr; + return parse == false; +} + +/* register a driver */ +void +rte_pci_register(struct rte_pci_driver *driver) +{ + TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); + driver->bus = &rte_pci_bus; +} + +/* unregister a driver */ +void +rte_pci_unregister(struct rte_pci_driver *driver) +{ + TAILQ_REMOVE(&rte_pci_bus.driver_list, driver, next); + driver->bus = NULL; +} + +/* Add a device to PCI bus */ +void +rte_pci_add_device(struct rte_pci_device *pci_dev) +{ + TAILQ_INSERT_TAIL(&rte_pci_bus.device_list, pci_dev, next); +} + +/* Insert a device into a predefined position in PCI bus */ +void +rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, + struct rte_pci_device *new_pci_dev) +{ + TAILQ_INSERT_BEFORE(exist_pci_dev, new_pci_dev, next); +} + +/* Remove a device from PCI bus */ +static void +rte_pci_remove_device(struct rte_pci_device *pci_dev) +{ + TAILQ_REMOVE(&rte_pci_bus.device_list, pci_dev, next); +} + +static struct rte_device * +pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, + const void *data) +{ + const struct rte_pci_device *pstart; + struct rte_pci_device *pdev; + + if (start != NULL) { + pstart = RTE_DEV_TO_PCI_CONST(start); + pdev = TAILQ_NEXT(pstart, next); + } else { + pdev = TAILQ_FIRST(&rte_pci_bus.device_list); + } + while (pdev != NULL) { + if (cmp(&pdev->device, data) == 0) + return &pdev->device; + pdev = TAILQ_NEXT(pdev, next); + } + return NULL; +} + +/* + * find the device which encounter the failure, by iterate over all device on + * PCI bus to check if the memory failure address is located in the range + * of the BARs of the device. + */ +static struct rte_pci_device * +pci_find_device_by_addr(const void *failure_addr) +{ + struct rte_pci_device *pdev = NULL; + uint64_t check_point, start, end, len; + int i; + + check_point = (uint64_t)(uintptr_t)failure_addr; + + FOREACH_DEVICE_ON_PCIBUS(pdev) { + for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) { + start = (uint64_t)(uintptr_t)pdev->mem_resource[i].addr; + len = pdev->mem_resource[i].len; + end = start + len; + if (check_point >= start && check_point < end) { + RTE_LOG(DEBUG, EAL, "Failure address %16.16" + PRIx64" belongs to device %s!\n", + check_point, pdev->device.name); + return pdev; + } + } + } + return NULL; +} + +static int +pci_hot_unplug_handler(struct rte_device *dev) +{ + struct rte_pci_device *pdev = NULL; + int ret = 0; + + pdev = RTE_DEV_TO_PCI(dev); + if (!pdev) + return -1; + + switch (pdev->kdrv) { +#ifdef HAVE_VFIO_DEV_REQ_INTERFACE + case RTE_KDRV_VFIO: + /* + * vfio kernel module guaranty the pci device would not be + * deleted until the user space release the resource, so no + * need to remap BARs resource here, just directly notify + * the req event to the user space to handle it. + */ + rte_dev_event_callback_process(dev->name, + RTE_DEV_EVENT_REMOVE); + break; +#endif + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + case RTE_KDRV_NIC_UIO: + /* BARs resource is invalid, remap it to be safe. */ + ret = pci_uio_remap_resource(pdev); + break; + default: + RTE_LOG(DEBUG, EAL, + "Not managed by a supported kernel driver, skipped\n"); + ret = -1; + break; + } + + return ret; +} + +static int +pci_sigbus_handler(const void *failure_addr) +{ + struct rte_pci_device *pdev = NULL; + int ret = 0; + + pdev = pci_find_device_by_addr(failure_addr); + if (!pdev) { + /* It is a generic sigbus error, no bus would handle it. */ + ret = 1; + } else { + /* The sigbus error is caused of hot-unplug. */ + ret = pci_hot_unplug_handler(&pdev->device); + if (ret) { + RTE_LOG(ERR, EAL, + "Failed to handle hot-unplug for device %s", + pdev->name); + ret = -1; + } + } + return ret; +} + +static int +pci_plug(struct rte_device *dev) +{ + return pci_probe_all_drivers(RTE_DEV_TO_PCI(dev)); +} + +static int +pci_unplug(struct rte_device *dev) +{ + struct rte_pci_device *pdev; + int ret; + + pdev = RTE_DEV_TO_PCI(dev); + ret = rte_pci_detach_dev(pdev); + if (ret == 0) { + rte_pci_remove_device(pdev); + rte_devargs_remove(dev->devargs); + free(pdev); + } + return ret; +} + +static int +pci_dma_map(struct rte_device *dev, void *addr, uint64_t iova, size_t len) +{ + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev); + + if (!pdev || !pdev->driver) { + rte_errno = EINVAL; + return -1; + } + if (pdev->driver->dma_map) + return pdev->driver->dma_map(pdev, addr, iova, len); + /** + * In case driver don't provides any specific mapping + * try fallback to VFIO. + */ + if (pdev->kdrv == RTE_KDRV_VFIO) + return rte_vfio_container_dma_map + (RTE_VFIO_DEFAULT_CONTAINER_FD, (uintptr_t)addr, + iova, len); + rte_errno = ENOTSUP; + return -1; +} + +static int +pci_dma_unmap(struct rte_device *dev, void *addr, uint64_t iova, size_t len) +{ + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev); + + if (!pdev || !pdev->driver) { + rte_errno = EINVAL; + return -1; + } + if (pdev->driver->dma_unmap) + return pdev->driver->dma_unmap(pdev, addr, iova, len); + /** + * In case driver don't provides any specific mapping + * try fallback to VFIO. + */ + if (pdev->kdrv == RTE_KDRV_VFIO) + return rte_vfio_container_dma_unmap + (RTE_VFIO_DEFAULT_CONTAINER_FD, (uintptr_t)addr, + iova, len); + rte_errno = ENOTSUP; + return -1; +} + +bool +rte_pci_ignore_device(const struct rte_pci_addr *pci_addr) +{ + struct rte_devargs *devargs = pci_devargs_lookup(pci_addr); + + switch (rte_pci_bus.bus.conf.scan_mode) { + case RTE_BUS_SCAN_WHITELIST: + if (devargs && devargs->policy == RTE_DEV_WHITELISTED) + return false; + break; + case RTE_BUS_SCAN_UNDEFINED: + case RTE_BUS_SCAN_BLACKLIST: + if (devargs == NULL || + devargs->policy != RTE_DEV_BLACKLISTED) + return false; + break; + } + return true; +} + +enum rte_iova_mode +rte_pci_get_iommu_class(void) +{ + enum rte_iova_mode iova_mode = RTE_IOVA_DC; + const struct rte_pci_device *dev; + const struct rte_pci_driver *drv; + bool devices_want_va = false; + bool devices_want_pa = false; + int iommu_no_va = -1; + + FOREACH_DEVICE_ON_PCIBUS(dev) { + /* + * We can check this only once, because the IOMMU hardware is + * the same for all of them. + */ + if (iommu_no_va == -1) + iommu_no_va = pci_device_iommu_support_va(dev) + ? 0 : 1; + + if (dev->kdrv == RTE_KDRV_UNKNOWN || + dev->kdrv == RTE_KDRV_NONE) + continue; + FOREACH_DRIVER_ON_PCIBUS(drv) { + enum rte_iova_mode dev_iova_mode; + + if (!rte_pci_match(drv, dev)) + continue; + + dev_iova_mode = pci_device_iova_mode(drv, dev); + RTE_LOG(DEBUG, EAL, "PCI driver %s for device " + PCI_PRI_FMT " wants IOVA as '%s'\n", + drv->driver.name, + dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function, + dev_iova_mode == RTE_IOVA_DC ? "DC" : + (dev_iova_mode == RTE_IOVA_PA ? "PA" : "VA")); + if (dev_iova_mode == RTE_IOVA_PA) + devices_want_pa = true; + else if (dev_iova_mode == RTE_IOVA_VA) + devices_want_va = true; + } + } + if (iommu_no_va == 1) { + iova_mode = RTE_IOVA_PA; + if (devices_want_va) { + RTE_LOG(WARNING, EAL, "Some devices want 'VA' but IOMMU does not support 'VA'.\n"); + RTE_LOG(WARNING, EAL, "The devices that want 'VA' won't initialize.\n"); + } + } else if (devices_want_va && !devices_want_pa) { + iova_mode = RTE_IOVA_VA; + } else if (devices_want_pa && !devices_want_va) { + iova_mode = RTE_IOVA_PA; + } else { + iova_mode = RTE_IOVA_DC; + if (devices_want_va) { + RTE_LOG(WARNING, EAL, "Some devices want 'VA' but forcing 'DC' because other devices want 'PA'.\n"); + RTE_LOG(WARNING, EAL, "Depending on the final decision by the EAL, not all devices may be able to initialize.\n"); + } + } + return iova_mode; +} + +struct rte_pci_bus rte_pci_bus = { + .bus = { + .scan = rte_pci_scan, + .probe = pci_probe, + .find_device = pci_find_device, + .plug = pci_plug, + .unplug = pci_unplug, + .parse = pci_parse, + .dma_map = pci_dma_map, + .dma_unmap = pci_dma_unmap, + .get_iommu_class = rte_pci_get_iommu_class, + .dev_iterate = rte_pci_dev_iterate, + .hot_unplug_handler = pci_hot_unplug_handler, + .sigbus_handler = pci_sigbus_handler, + }, + .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), + .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), +}; + +RTE_REGISTER_BUS(pci, rte_pci_bus.bus); diff --git a/src/spdk/dpdk/drivers/bus/pci/pci_common_uio.c b/src/spdk/dpdk/drivers/bus/pci/pci_common_uio.c new file mode 100644 index 000000000..f4dca9da9 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/pci_common_uio.c @@ -0,0 +1,239 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include <rte_eal.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_tailq.h> +#include <rte_log.h> +#include <rte_malloc.h> + +#include "private.h" + +static struct rte_tailq_elem rte_uio_tailq = { + .name = "UIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_uio_tailq) + +static int +pci_uio_map_secondary(struct rte_pci_device *dev) +{ + int fd, i, j; + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (rte_pci_addr_cmp(&uio_res->pci_addr, &dev->addr)) + continue; + + for (i = 0; i != uio_res->nb_maps; i++) { + /* + * open devname, to mmap it + */ + fd = open(uio_res->maps[i].path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + uio_res->maps[i].path, strerror(errno)); + return -1; + } + + void *mapaddr = pci_map_resource(uio_res->maps[i].addr, + fd, (off_t)uio_res->maps[i].offset, + (size_t)uio_res->maps[i].size, 0); + /* fd is not needed in slave process, close it */ + close(fd); + if (mapaddr != uio_res->maps[i].addr) { + RTE_LOG(ERR, EAL, + "Cannot mmap device resource file %s to address: %p\n", + uio_res->maps[i].path, + uio_res->maps[i].addr); + if (mapaddr != MAP_FAILED) { + /* unmap addrs correctly mapped */ + for (j = 0; j < i; j++) + pci_unmap_resource( + uio_res->maps[j].addr, + (size_t)uio_res->maps[j].size); + /* unmap addr wrongly mapped */ + pci_unmap_resource(mapaddr, + (size_t)uio_res->maps[i].size); + } + return -1; + } + dev->mem_resource[i].addr = mapaddr; + } + return 0; + } + + RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); + return 1; +} + +/* map the PCI resource of a PCI device in virtual memory */ +int +pci_uio_map_resource(struct rte_pci_device *dev) +{ + int i, map_idx = 0, ret; + uint64_t phaddr; + struct mapped_pci_resource *uio_res = NULL; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + dev->intr_handle.fd = -1; + dev->intr_handle.uio_cfg_fd = -1; + + /* secondary processes - use already recorded details */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_map_secondary(dev); + + /* allocate uio resource */ + ret = pci_uio_alloc_resource(dev, &uio_res); + if (ret) + return ret; + + /* Map all BARs */ + for (i = 0; i != PCI_MAX_RESOURCE; i++) { + /* skip empty BAR */ + phaddr = dev->mem_resource[i].phys_addr; + if (phaddr == 0) + continue; + + ret = pci_uio_map_resource_by_index(dev, i, + uio_res, map_idx); + if (ret) + goto error; + + map_idx++; + } + + uio_res->nb_maps = map_idx; + + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); + + return 0; +error: + for (i = 0; i < map_idx; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + rte_free(uio_res->maps[i].path); + } + pci_uio_free_resource(dev, uio_res); + return -1; +} + +static void +pci_uio_unmap(struct mapped_pci_resource *uio_res) +{ + int i; + + if (uio_res == NULL) + return; + + for (i = 0; i != uio_res->nb_maps; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(uio_res->maps[i].path); + } +} + +/* remap the PCI resource of a PCI device in anonymous virtual memory */ +int +pci_uio_remap_resource(struct rte_pci_device *dev) +{ + int i; + void *map_address; + + if (dev == NULL) + return -1; + + /* Remap all BARs */ + for (i = 0; i != PCI_MAX_RESOURCE; i++) { + /* skip empty BAR */ + if (dev->mem_resource[i].phys_addr == 0) + continue; + map_address = mmap(dev->mem_resource[i].addr, + (size_t)dev->mem_resource[i].len, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + if (map_address == MAP_FAILED) { + RTE_LOG(ERR, EAL, + "Cannot remap resource for device %s\n", + dev->name); + return -1; + } + RTE_LOG(INFO, EAL, + "Successful remap resource for device %s\n", + dev->name); + } + + return 0; +} + +static struct mapped_pci_resource * +pci_uio_find_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return NULL; + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (!rte_pci_addr_cmp(&uio_res->pci_addr, &dev->addr)) + return uio_res; + } + return NULL; +} + +/* unmap the PCI resource of a PCI device in virtual memory */ +void +pci_uio_unmap_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return; + + /* find an entry for the device */ + uio_res = pci_uio_find_resource(dev); + if (uio_res == NULL) + return; + + /* secondary processes - just free maps */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_unmap(uio_res); + + TAILQ_REMOVE(uio_res_list, uio_res, next); + + /* unmap all resources */ + pci_uio_unmap(uio_res); + + /* free uio resource */ + rte_free(uio_res); + + /* close fd if in primary process */ + close(dev->intr_handle.fd); + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; +} diff --git a/src/spdk/dpdk/drivers/bus/pci/pci_params.c b/src/spdk/dpdk/drivers/bus/pci/pci_params.c new file mode 100644 index 000000000..3192e9c96 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/pci_params.c @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 GaĆ«tan Rivet + */ + +#include <rte_bus.h> +#include <rte_bus_pci.h> +#include <rte_dev.h> +#include <rte_errno.h> +#include <rte_kvargs.h> +#include <rte_pci.h> + +#include "private.h" + +enum pci_params { + RTE_PCI_PARAM_ADDR, + RTE_PCI_PARAM_MAX, +}; + +static const char * const pci_params_keys[] = { + [RTE_PCI_PARAM_ADDR] = "addr", + [RTE_PCI_PARAM_MAX] = NULL, +}; + +static int +pci_addr_kv_cmp(const char *key __rte_unused, + const char *value, + void *_addr2) +{ + struct rte_pci_addr _addr1; + struct rte_pci_addr *addr1 = &_addr1; + struct rte_pci_addr *addr2 = _addr2; + + if (rte_pci_addr_parse(value, addr1)) + return -1; + return -abs(rte_pci_addr_cmp(addr1, addr2)); +} + +static int +pci_dev_match(const struct rte_device *dev, + const void *_kvlist) +{ + const struct rte_kvargs *kvlist = _kvlist; + const struct rte_pci_device *pdev; + + if (kvlist == NULL) + /* Empty string matches everything. */ + return 0; + pdev = RTE_DEV_TO_PCI_CONST(dev); + /* if any field does not match. */ + if (rte_kvargs_process(kvlist, pci_params_keys[RTE_PCI_PARAM_ADDR], + &pci_addr_kv_cmp, + (void *)(intptr_t)&pdev->addr)) + return 1; + return 0; +} + +void * +rte_pci_dev_iterate(const void *start, + const char *str, + const struct rte_dev_iterator *it __rte_unused) +{ + rte_bus_find_device_t find_device; + struct rte_kvargs *kvargs = NULL; + struct rte_device *dev; + + if (str != NULL) { + kvargs = rte_kvargs_parse(str, pci_params_keys); + if (kvargs == NULL) { + RTE_LOG(ERR, EAL, "cannot parse argument list\n"); + rte_errno = EINVAL; + return NULL; + } + } + find_device = rte_pci_bus.bus.find_device; + dev = find_device(start, pci_dev_match, kvargs); + rte_kvargs_free(kvargs); + return dev; +} diff --git a/src/spdk/dpdk/drivers/bus/pci/private.h b/src/spdk/dpdk/drivers/bus/pci/private.h new file mode 100644 index 000000000..367cdd9a6 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/private.h @@ -0,0 +1,220 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 6WIND S.A. + */ + +#ifndef _PCI_PRIVATE_H_ +#define _PCI_PRIVATE_H_ + +#include <stdbool.h> +#include <stdio.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> + +extern struct rte_pci_bus rte_pci_bus; + +struct rte_pci_driver; +struct rte_pci_device; + +extern struct rte_pci_bus rte_pci_bus; + +/** + * Scan the content of the PCI bus, and the devices in the devices + * list + * + * @return + * 0 on success, negative on error + */ +int rte_pci_scan(void); + +/** + * Find the name of a PCI device. + */ +void +pci_name_set(struct rte_pci_device *dev); + +/** + * Validate whether a device with given PCI address should be ignored or not. + * + * @param pci_addr + * PCI address of device to be validated + * @return + * true: if device is to be ignored, + * false: if device is to be scanned, + */ +bool rte_pci_ignore_device(const struct rte_pci_addr *pci_addr); + +/** + * Add a PCI device to the PCI Bus (append to PCI Device list). This function + * also updates the bus references of the PCI Device (and the generic device + * object embedded within. + * + * @param pci_dev + * PCI device to add + * @return void + */ +void rte_pci_add_device(struct rte_pci_device *pci_dev); + +/** + * Insert a PCI device in the PCI Bus at a particular location in the device + * list. It also updates the PCI Bus reference of the new devices to be + * inserted. + * + * @param exist_pci_dev + * Existing PCI device in PCI Bus + * @param new_pci_dev + * PCI device to be added before exist_pci_dev + * @return void + */ +void rte_pci_insert_device(struct rte_pci_device *exist_pci_dev, + struct rte_pci_device *new_pci_dev); + +/** + * Update a pci device object by asking the kernel for the latest information. + * + * This function is private to EAL. + * + * @param addr + * The PCI Bus-Device-Function address to look for + * @return + * - 0 on success. + * - negative on error. + */ +int pci_update_device(const struct rte_pci_addr *addr); + +/** + * Map the PCI resource of a PCI device in virtual memory + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource(struct rte_pci_device *dev); + +/** + * Unmap the PCI resource of a PCI device + * + * This function is private to EAL. + */ +void pci_uio_unmap_resource(struct rte_pci_device *dev); + +/** + * Allocate uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to allocate uio resource + * @param uio_res + * Pointer to uio resource. + * If the function returns 0, the pointer will be filled. + * @return + * 0 on success, negative on error + */ +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); + +/** + * Free uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to free uio resource + * @param uio_res + * Pointer to uio resource. + */ +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); + +/** + * Remap the PCI resource of a PCI device in anonymous virtual memory. + * + * @param dev + * Point to the struct rte pci device. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +pci_uio_remap_resource(struct rte_pci_device *dev); + +/** + * Map device memory to uio resource + * + * This function is private to EAL. + * + * @param dev + * PCI device that has memory information. + * @param res_idx + * Memory resource index of the PCI device. + * @param uio_res + * uio resource that will keep mapping information. + * @param map_idx + * Mapping information index of the uio resource. + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +/* + * Match the PCI Driver and Device using the ID Table + * + * @param pci_drv + * PCI driver from which ID table would be extracted + * @param pci_dev + * PCI device to match against the driver + * @return + * 1 for successful match + * 0 for unsuccessful match + */ +int +rte_pci_match(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev); + +/** + * OS specific callbacks for rte_pci_get_iommu_class + * + */ +bool +pci_device_iommu_support_va(const struct rte_pci_device *dev); + +enum rte_iova_mode +pci_device_iova_mode(const struct rte_pci_driver *pci_drv, + const struct rte_pci_device *pci_dev); + +/** + * Get iommu class of PCI devices on the bus. + * And return their preferred iova mapping mode. + * + * @return + * - enum rte_iova_mode. + */ +enum rte_iova_mode +rte_pci_get_iommu_class(void); + +/* + * Iterate over internal devices, + * matching any device against the provided + * string. + * + * @param start + * Iteration starting point. + * + * @param str + * Device string to match against. + * + * @param it + * (unused) iterator structure. + * + * @return + * A pointer to the next matching device if any. + * NULL otherwise. + */ +void * +rte_pci_dev_iterate(const void *start, + const char *str, + const struct rte_dev_iterator *it); + +#endif /* _PCI_PRIVATE_H_ */ diff --git a/src/spdk/dpdk/drivers/bus/pci/rte_bus_pci.h b/src/spdk/dpdk/drivers/bus/pci/rte_bus_pci.h new file mode 100644 index 000000000..29bea6d70 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/rte_bus_pci.h @@ -0,0 +1,361 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation. + * Copyright 2013-2014 6WIND S.A. + */ + +#ifndef _RTE_BUS_PCI_H_ +#define _RTE_BUS_PCI_H_ + +/** + * @file + * + * RTE PCI Bus Interface + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <limits.h> +#include <errno.h> +#include <sys/queue.h> +#include <stdint.h> +#include <inttypes.h> + +#include <rte_debug.h> +#include <rte_interrupts.h> +#include <rte_dev.h> +#include <rte_bus.h> +#include <rte_pci.h> + +/** Pathname of PCI devices directory. */ +const char *rte_pci_get_sysfs_path(void); + +/* Forward declarations */ +struct rte_pci_device; +struct rte_pci_driver; + +/** List of PCI devices */ +TAILQ_HEAD(rte_pci_device_list, rte_pci_device); +/** List of PCI drivers */ +TAILQ_HEAD(rte_pci_driver_list, rte_pci_driver); + +/* PCI Bus iterators */ +#define FOREACH_DEVICE_ON_PCIBUS(p) \ + TAILQ_FOREACH(p, &(rte_pci_bus.device_list), next) + +#define FOREACH_DRIVER_ON_PCIBUS(p) \ + TAILQ_FOREACH(p, &(rte_pci_bus.driver_list), next) + +struct rte_devargs; + +/** + * A structure describing a PCI device. + */ +struct rte_pci_device { + TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ + struct rte_device device; /**< Inherit core device */ + struct rte_pci_addr addr; /**< PCI location. */ + struct rte_pci_id id; /**< PCI ID. */ + struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; + /**< PCI Memory Resource */ + struct rte_intr_handle intr_handle; /**< Interrupt handle */ + struct rte_pci_driver *driver; /**< PCI driver used in probing */ + uint16_t max_vfs; /**< sriov enable if not zero */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ + char name[PCI_PRI_STR_SIZE+1]; /**< PCI location (ASCII) */ + struct rte_intr_handle vfio_req_intr_handle; + /**< Handler of VFIO request interrupt */ +}; + +/** + * @internal + * Helper macro for drivers that need to convert to struct rte_pci_device. + */ +#define RTE_DEV_TO_PCI(ptr) container_of(ptr, struct rte_pci_device, device) + +#define RTE_DEV_TO_PCI_CONST(ptr) \ + container_of(ptr, const struct rte_pci_device, device) + +#define RTE_ETH_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device) + +/** Any PCI device identifier (vendor, device, ...) */ +#define PCI_ANY_ID (0xffff) +#define RTE_CLASS_ANY_ID (0xffffff) + +#ifdef __cplusplus +/** C++ macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + RTE_CLASS_ANY_ID, \ + (vend), \ + (dev), \ + PCI_ANY_ID, \ + PCI_ANY_ID +#else +/** Macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + .class_id = RTE_CLASS_ANY_ID, \ + .vendor_id = (vend), \ + .device_id = (dev), \ + .subsystem_vendor_id = PCI_ANY_ID, \ + .subsystem_device_id = PCI_ANY_ID +#endif + +/** + * Initialisation function for the driver called during PCI probing. + */ +typedef int (pci_probe_t)(struct rte_pci_driver *, struct rte_pci_device *); + +/** + * Uninitialisation function for the driver called during hotplugging. + */ +typedef int (pci_remove_t)(struct rte_pci_device *); + +/** + * Driver-specific DMA mapping. After a successful call the device + * will be able to read/write from/to this segment. + * + * @param dev + * Pointer to the PCI device. + * @param addr + * Starting virtual address of memory to be mapped. + * @param iova + * Starting IOVA address of memory to be mapped. + * @param len + * Length of memory segment being mapped. + * @return + * - 0 On success. + * - Negative value and rte_errno is set otherwise. + */ +typedef int (pci_dma_map_t)(struct rte_pci_device *dev, void *addr, + uint64_t iova, size_t len); + +/** + * Driver-specific DMA un-mapping. After a successful call the device + * will not be able to read/write from/to this segment. + * + * @param dev + * Pointer to the PCI device. + * @param addr + * Starting virtual address of memory to be unmapped. + * @param iova + * Starting IOVA address of memory to be unmapped. + * @param len + * Length of memory segment being unmapped. + * @return + * - 0 On success. + * - Negative value and rte_errno is set otherwise. + */ +typedef int (pci_dma_unmap_t)(struct rte_pci_device *dev, void *addr, + uint64_t iova, size_t len); + +/** + * A structure describing a PCI driver. + */ +struct rte_pci_driver { + TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ + struct rte_driver driver; /**< Inherit core driver. */ + struct rte_pci_bus *bus; /**< PCI bus reference. */ + pci_probe_t *probe; /**< Device Probe function. */ + pci_remove_t *remove; /**< Device Remove function. */ + pci_dma_map_t *dma_map; /**< device dma map function. */ + pci_dma_unmap_t *dma_unmap; /**< device dma unmap function. */ + const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ + uint32_t drv_flags; /**< Flags RTE_PCI_DRV_*. */ +}; + +/** + * Structure describing the PCI bus + */ +struct rte_pci_bus { + struct rte_bus bus; /**< Inherit the generic class */ + struct rte_pci_device_list device_list; /**< List of PCI devices */ + struct rte_pci_driver_list driver_list; /**< List of PCI drivers */ +}; + +/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ +#define RTE_PCI_DRV_NEED_MAPPING 0x0001 +/** Device needs PCI BAR mapping with enabled write combining (wc) */ +#define RTE_PCI_DRV_WC_ACTIVATE 0x0002 +/** Device already probed can be probed again to check for new ports. */ +#define RTE_PCI_DRV_PROBE_AGAIN 0x0004 +/** Device driver supports link state interrupt */ +#define RTE_PCI_DRV_INTR_LSC 0x0008 +/** Device driver supports device removal interrupt */ +#define RTE_PCI_DRV_INTR_RMV 0x0010 +/** Device driver needs to keep mapped resources if unsupported dev detected */ +#define RTE_PCI_DRV_KEEP_MAPPED_RES 0x0020 +/** Device driver needs IOVA as VA and cannot work with IOVA as PA */ +#define RTE_PCI_DRV_NEED_IOVA_AS_VA 0x0040 + +/** + * Map the PCI device resources in user space virtual memory address + * + * Note that driver should not call this function when flag + * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for + * you when it's on. + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + * + * @return + * 0 on success, negative on error and positive if no driver + * is found for the device. + */ +int rte_pci_map_device(struct rte_pci_device *dev); + +/** + * Unmap this device + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + */ +void rte_pci_unmap_device(struct rte_pci_device *dev); + +/** + * Dump the content of the PCI bus. + * + * @param f + * A pointer to a file for output + */ +void rte_pci_dump(FILE *f); + +/** + * Register a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be registered. + */ +void rte_pci_register(struct rte_pci_driver *driver); + +/** Helper for PCI device registration from driver (eth, crypto) instance */ +#define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ +RTE_INIT(pciinitfn_ ##nm) \ +{\ + (pci_drv).driver.name = RTE_STR(nm);\ + rte_pci_register(&pci_drv); \ +} \ +RTE_PMD_EXPORT_NAME(nm, __COUNTER__) + +/** + * Unregister a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be unregistered. + */ +void rte_pci_unregister(struct rte_pci_driver *driver); + +/** + * Read PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + * @return + * Number of bytes read on success, negative on error. + */ +int rte_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset); + +/** + * Write PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer containing the bytes should be written + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset); + +/** + * A structure used to access io resources for a pci device. + * rte_pci_ioport is arch, os, driver specific, and should not be used outside + * of pci ioport api. + */ +struct rte_pci_ioport { + struct rte_pci_device *dev; + uint64_t base; + uint64_t len; /* only filled for memory mapped ports */ +}; + +/** + * Initialize a rte_pci_ioport object for a pci device io resource. + * + * This object is then used to gain access to those io resources (see below). + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use. + * @param bar + * Index of the io pci resource we want to access. + * @param p + * The rte_pci_ioport object to be initialized. + * @return + * 0 on success, negative on error. + */ +int rte_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); + +/** + * Release any resources used in a rte_pci_ioport object. + * + * @param p + * The rte_pci_ioport object to be uninitialized. + * @return + * 0 on success, negative on error. + */ +int rte_pci_ioport_unmap(struct rte_pci_ioport *p); + +/** + * Read from a io pci resource. + * + * @param p + * The rte_pci_ioport object from which we want to read. + * @param data + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into the pci io resource. + */ +void rte_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); + +/** + * Write to a io pci resource. + * + * @param p + * The rte_pci_ioport object to which we want to write. + * @param data + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into the pci io resource. + */ +void rte_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BUS_PCI_H_ */ diff --git a/src/spdk/dpdk/drivers/bus/pci/rte_bus_pci_version.map b/src/spdk/dpdk/drivers/bus/pci/rte_bus_pci_version.map new file mode 100644 index 000000000..012d817e1 --- /dev/null +++ b/src/spdk/dpdk/drivers/bus/pci/rte_bus_pci_version.map @@ -0,0 +1,18 @@ +DPDK_20.0 { + global: + + rte_pci_dump; + rte_pci_get_sysfs_path; + rte_pci_ioport_map; + rte_pci_ioport_read; + rte_pci_ioport_unmap; + rte_pci_ioport_write; + rte_pci_map_device; + rte_pci_read_config; + rte_pci_register; + rte_pci_unmap_device; + rte_pci_unregister; + rte_pci_write_config; + + local: *; +}; |