diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/lib/env_dpdk/pci.c | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/lib/env_dpdk/pci.c')
-rw-r--r-- | src/spdk/lib/env_dpdk/pci.c | 1063 |
1 files changed, 1063 insertions, 0 deletions
diff --git a/src/spdk/lib/env_dpdk/pci.c b/src/spdk/lib/env_dpdk/pci.c new file mode 100644 index 000000000..5fd1b4abd --- /dev/null +++ b/src/spdk/lib/env_dpdk/pci.c @@ -0,0 +1,1063 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "env_internal.h" + +#include <rte_alarm.h> +#include <rte_devargs.h> +#include "spdk/env.h" + +#define SYSFS_PCI_DRIVERS "/sys/bus/pci/drivers" + +#define PCI_CFG_SIZE 256 +#define PCI_EXT_CAP_ID_SN 0x03 + +/* DPDK 18.11+ hotplug isn't robust. Multiple apps starting at the same time + * might cause the internal IPC to misbehave. Just retry in such case. + */ +#define DPDK_HOTPLUG_RETRY_COUNT 4 + +/* DPDK alarm/interrupt thread */ +static pthread_mutex_t g_pci_mutex = PTHREAD_MUTEX_INITIALIZER; +static TAILQ_HEAD(, spdk_pci_device) g_pci_devices = TAILQ_HEAD_INITIALIZER(g_pci_devices); +/* devices hotplugged on a dpdk thread */ +static TAILQ_HEAD(, spdk_pci_device) g_pci_hotplugged_devices = + TAILQ_HEAD_INITIALIZER(g_pci_hotplugged_devices); +static TAILQ_HEAD(, spdk_pci_driver) g_pci_drivers = TAILQ_HEAD_INITIALIZER(g_pci_drivers); + +static int +map_bar_rte(struct spdk_pci_device *device, uint32_t bar, + void **mapped_addr, uint64_t *phys_addr, uint64_t *size) +{ + struct rte_pci_device *dev = device->dev_handle; + + *mapped_addr = dev->mem_resource[bar].addr; + *phys_addr = (uint64_t)dev->mem_resource[bar].phys_addr; + *size = (uint64_t)dev->mem_resource[bar].len; + + return 0; +} + +static int +unmap_bar_rte(struct spdk_pci_device *device, uint32_t bar, void *addr) +{ + return 0; +} + +static int +cfg_read_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) +{ + int rc; + + rc = rte_pci_read_config(dev->dev_handle, value, len, offset); + + return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; +} + +static int +cfg_write_rte(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) +{ + int rc; + + rc = rte_pci_write_config(dev->dev_handle, value, len, offset); + +#ifdef __FreeBSD__ + /* DPDK returns 0 on success and -1 on failure */ + return rc; +#endif + return (rc > 0 && (uint32_t) rc == len) ? 0 : -1; +} + +static void +remove_rte_dev(struct rte_pci_device *rte_dev) +{ + char bdf[32]; + int i = 0, rc; + + snprintf(bdf, sizeof(bdf), "%s", rte_dev->device.name); + do { + rc = rte_eal_hotplug_remove("pci", bdf); + } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); +} + +static void +detach_rte_cb(void *_dev) +{ + remove_rte_dev(_dev); +} + +static void +detach_rte(struct spdk_pci_device *dev) +{ + struct rte_pci_device *rte_dev = dev->dev_handle; + int i; + bool removed; + + if (!spdk_process_is_primary()) { + remove_rte_dev(rte_dev); + return; + } + + pthread_mutex_lock(&g_pci_mutex); + dev->internal.attached = false; + /* prevent the hotremove notification from removing this device */ + dev->internal.pending_removal = true; + pthread_mutex_unlock(&g_pci_mutex); + + rte_eal_alarm_set(1, detach_rte_cb, rte_dev); + + /* wait up to 2s for the cb to execute */ + for (i = 2000; i > 0; i--) { + + spdk_delay_us(1000); + pthread_mutex_lock(&g_pci_mutex); + removed = dev->internal.removed; + pthread_mutex_unlock(&g_pci_mutex); + + if (removed) { + break; + } + } + + /* besides checking the removed flag, we also need to wait + * for the dpdk detach function to unwind, as it's doing some + * operations even after calling our detach callback. Simply + * cancel the alarm - if it started executing already, this + * call will block and wait for it to finish. + */ + rte_eal_alarm_cancel(detach_rte_cb, rte_dev); + + /* the device could have been finally removed, so just check + * it again. + */ + pthread_mutex_lock(&g_pci_mutex); + removed = dev->internal.removed; + pthread_mutex_unlock(&g_pci_mutex); + if (!removed) { + fprintf(stderr, "Timeout waiting for DPDK to remove PCI device %s.\n", + rte_dev->name); + /* If we reach this state, then the device couldn't be removed and most likely + a subsequent hot add of a device in the same BDF will fail */ + } +} + +void +spdk_pci_driver_register(const char *name, struct spdk_pci_id *id_table, uint32_t flags) +{ + struct spdk_pci_driver *driver; + + driver = calloc(1, sizeof(*driver)); + if (!driver) { + /* we can't do any better than bailing atm */ + return; + } + + driver->name = name; + driver->id_table = id_table; + driver->drv_flags = flags; + TAILQ_INSERT_TAIL(&g_pci_drivers, driver, tailq); +} + +struct spdk_pci_driver * +spdk_pci_nvme_get_driver(void) +{ + return spdk_pci_get_driver("nvme"); +} + +struct spdk_pci_driver * +spdk_pci_get_driver(const char *name) +{ + struct spdk_pci_driver *driver; + + TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { + if (strcmp(driver->name, name) == 0) { + return driver; + } + } + + return NULL; +} + +static void +pci_device_rte_hotremove(const char *device_name, + enum rte_dev_event_type event, + void *cb_arg) +{ + struct spdk_pci_device *dev; + bool can_detach = false; + + if (event != RTE_DEV_EVENT_REMOVE) { + return; + } + + pthread_mutex_lock(&g_pci_mutex); + TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { + struct rte_pci_device *rte_dev = dev->dev_handle; + + if (strcmp(rte_dev->name, device_name) == 0 && + !dev->internal.pending_removal) { + can_detach = !dev->internal.attached; + /* prevent any further attaches */ + dev->internal.pending_removal = true; + break; + } + } + pthread_mutex_unlock(&g_pci_mutex); + + if (dev != NULL && can_detach) { + /* if device is not attached we can remove it right away. + * Otherwise it will be removed at detach. + */ + remove_rte_dev(dev->dev_handle); + } +} + +static void +cleanup_pci_devices(void) +{ + struct spdk_pci_device *dev, *tmp; + + pthread_mutex_lock(&g_pci_mutex); + /* cleanup removed devices */ + TAILQ_FOREACH_SAFE(dev, &g_pci_devices, internal.tailq, tmp) { + if (!dev->internal.removed) { + continue; + } + + vtophys_pci_device_removed(dev->dev_handle); + TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); + free(dev); + } + + /* add newly-attached devices */ + TAILQ_FOREACH_SAFE(dev, &g_pci_hotplugged_devices, internal.tailq, tmp) { + TAILQ_REMOVE(&g_pci_hotplugged_devices, dev, internal.tailq); + TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); + vtophys_pci_device_added(dev->dev_handle); + } + pthread_mutex_unlock(&g_pci_mutex); +} + +static int scan_pci_bus(bool delay_init); + +/* translate spdk_pci_driver to an rte_pci_driver and register it to dpdk */ +static int +register_rte_driver(struct spdk_pci_driver *driver) +{ + unsigned pci_id_count = 0; + struct rte_pci_id *rte_id_table; + char *rte_name; + size_t rte_name_len; + uint32_t rte_flags; + + assert(driver->id_table); + while (driver->id_table[pci_id_count].vendor_id) { + pci_id_count++; + } + assert(pci_id_count > 0); + + rte_id_table = calloc(pci_id_count + 1, sizeof(*rte_id_table)); + if (!rte_id_table) { + return -ENOMEM; + } + + while (pci_id_count > 0) { + struct rte_pci_id *rte_id = &rte_id_table[pci_id_count - 1]; + const struct spdk_pci_id *spdk_id = &driver->id_table[pci_id_count - 1]; + + rte_id->class_id = spdk_id->class_id; + rte_id->vendor_id = spdk_id->vendor_id; + rte_id->device_id = spdk_id->device_id; + rte_id->subsystem_vendor_id = spdk_id->subvendor_id; + rte_id->subsystem_device_id = spdk_id->subdevice_id; + pci_id_count--; + } + + assert(driver->name); + rte_name_len = strlen(driver->name) + strlen("spdk_") + 1; + rte_name = calloc(rte_name_len, 1); + if (!rte_name) { + free(rte_id_table); + return -ENOMEM; + } + + snprintf(rte_name, rte_name_len, "spdk_%s", driver->name); + driver->driver.driver.name = rte_name; + driver->driver.id_table = rte_id_table; + + rte_flags = 0; + if (driver->drv_flags & SPDK_PCI_DRIVER_NEED_MAPPING) { + rte_flags |= RTE_PCI_DRV_NEED_MAPPING; + } + if (driver->drv_flags & SPDK_PCI_DRIVER_WC_ACTIVATE) { + rte_flags |= RTE_PCI_DRV_WC_ACTIVATE; + } + driver->driver.drv_flags = rte_flags; + + driver->driver.probe = pci_device_init; + driver->driver.remove = pci_device_fini; + + rte_pci_register(&driver->driver); + return 0; +} + +static inline void +_pci_env_init(void) +{ + /* We assume devices were present on the bus for more than 2 seconds + * before initializing SPDK and there's no need to wait more. We scan + * the bus, but we don't blacklist any devices. + */ + scan_pci_bus(false); + + /* Register a single hotremove callback for all devices. */ + if (spdk_process_is_primary()) { + rte_dev_event_callback_register(NULL, pci_device_rte_hotremove, NULL); + } +} + +void +pci_env_init(void) +{ + struct spdk_pci_driver *driver; + + TAILQ_FOREACH(driver, &g_pci_drivers, tailq) { + register_rte_driver(driver); + } + + _pci_env_init(); +} + +void +pci_env_reinit(void) +{ + /* There is no need to register pci drivers again, since they were + * already pre-registered in pci_env_init. + */ + + _pci_env_init(); +} + +void +pci_env_fini(void) +{ + struct spdk_pci_device *dev; + char bdf[32]; + + cleanup_pci_devices(); + TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { + if (dev->internal.attached) { + spdk_pci_addr_fmt(bdf, sizeof(bdf), &dev->addr); + fprintf(stderr, "Device %s is still attached at shutdown!\n", bdf); + } + } + + if (spdk_process_is_primary()) { + rte_dev_event_callback_unregister(NULL, pci_device_rte_hotremove, NULL); + } +} + +int +pci_device_init(struct rte_pci_driver *_drv, + struct rte_pci_device *_dev) +{ + struct spdk_pci_driver *driver = (struct spdk_pci_driver *)_drv; + struct spdk_pci_device *dev; + int rc; + + dev = calloc(1, sizeof(*dev)); + if (dev == NULL) { + return -1; + } + + dev->dev_handle = _dev; + + dev->addr.domain = _dev->addr.domain; + dev->addr.bus = _dev->addr.bus; + dev->addr.dev = _dev->addr.devid; + dev->addr.func = _dev->addr.function; + dev->id.class_id = _dev->id.class_id; + dev->id.vendor_id = _dev->id.vendor_id; + dev->id.device_id = _dev->id.device_id; + dev->id.subvendor_id = _dev->id.subsystem_vendor_id; + dev->id.subdevice_id = _dev->id.subsystem_device_id; + dev->socket_id = _dev->device.numa_node; + dev->type = "pci"; + + dev->map_bar = map_bar_rte; + dev->unmap_bar = unmap_bar_rte; + dev->cfg_read = cfg_read_rte; + dev->cfg_write = cfg_write_rte; + + dev->internal.driver = driver; + dev->internal.claim_fd = -1; + + if (driver->cb_fn != NULL) { + rc = driver->cb_fn(driver->cb_arg, dev); + if (rc != 0) { + free(dev); + return rc; + } + dev->internal.attached = true; + } + + pthread_mutex_lock(&g_pci_mutex); + TAILQ_INSERT_TAIL(&g_pci_hotplugged_devices, dev, internal.tailq); + pthread_mutex_unlock(&g_pci_mutex); + return 0; +} + +int +pci_device_fini(struct rte_pci_device *_dev) +{ + struct spdk_pci_device *dev; + + pthread_mutex_lock(&g_pci_mutex); + TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { + if (dev->dev_handle == _dev) { + break; + } + } + + if (dev == NULL || dev->internal.attached) { + /* The device might be still referenced somewhere in SPDK. */ + pthread_mutex_unlock(&g_pci_mutex); + return -1; + } + + /* remove our whitelist_at option */ + if (_dev->device.devargs) { + _dev->device.devargs->data = NULL; + } + + assert(!dev->internal.removed); + dev->internal.removed = true; + pthread_mutex_unlock(&g_pci_mutex); + return 0; + +} + +void +spdk_pci_device_detach(struct spdk_pci_device *dev) +{ + assert(dev->internal.attached); + + if (dev->internal.claim_fd >= 0) { + spdk_pci_device_unclaim(dev); + } + + if (strcmp(dev->type, "pci") == 0) { + /* if it's a physical device we need to deal with DPDK on + * a different process and we can't just unset one flag + * here. We also want to stop using any device resources + * so that the device isn't "in use" by the userspace driver + * once we detach it. This would allow attaching the device + * to a different process, or to a kernel driver like nvme. + */ + detach_rte(dev); + } else { + dev->internal.attached = false; + } + + cleanup_pci_devices(); +} + +static int +scan_pci_bus(bool delay_init) +{ + struct spdk_pci_driver *driver; + struct rte_pci_device *rte_dev; + uint64_t now; + + rte_bus_scan(); + now = spdk_get_ticks(); + + driver = TAILQ_FIRST(&g_pci_drivers); + if (!driver) { + return 0; + } + + TAILQ_FOREACH(rte_dev, &driver->driver.bus->device_list, next) { + struct rte_devargs *da; + + da = rte_dev->device.devargs; + if (!da) { + char devargs_str[128]; + + /* the device was never blacklisted or whitelisted */ + da = calloc(1, sizeof(*da)); + if (!da) { + return -1; + } + + snprintf(devargs_str, sizeof(devargs_str), "pci:%s", rte_dev->device.name); + if (rte_devargs_parse(da, devargs_str) != 0) { + free(da); + return -1; + } + + rte_devargs_insert(&da); + rte_dev->device.devargs = da; + } + + if (da->data) { + uint64_t whitelist_at = (uint64_t)(uintptr_t)da->data; + + /* this device was seen by spdk before... */ + if (da->policy == RTE_DEV_BLACKLISTED && whitelist_at <= now) { + da->policy = RTE_DEV_WHITELISTED; + } + } else if ((driver->driver.bus->bus.conf.scan_mode == RTE_BUS_SCAN_WHITELIST && + da->policy == RTE_DEV_WHITELISTED) || da->policy != RTE_DEV_BLACKLISTED) { + /* override the policy only if not permanently blacklisted */ + + if (delay_init) { + da->policy = RTE_DEV_BLACKLISTED; + da->data = (void *)(now + 2 * spdk_get_ticks_hz()); + } else { + da->policy = RTE_DEV_WHITELISTED; + da->data = (void *)(uintptr_t)now; + } + } + } + + return 0; +} + +int +spdk_pci_device_attach(struct spdk_pci_driver *driver, + spdk_pci_enum_cb enum_cb, + void *enum_ctx, struct spdk_pci_addr *pci_address) +{ + struct spdk_pci_device *dev; + struct rte_pci_device *rte_dev; + struct rte_devargs *da; + int rc; + char bdf[32]; + + spdk_pci_addr_fmt(bdf, sizeof(bdf), pci_address); + + cleanup_pci_devices(); + + TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { + if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { + break; + } + } + + if (dev != NULL && dev->internal.driver == driver) { + pthread_mutex_lock(&g_pci_mutex); + if (dev->internal.attached || dev->internal.pending_removal) { + pthread_mutex_unlock(&g_pci_mutex); + return -1; + } + + rc = enum_cb(enum_ctx, dev); + if (rc == 0) { + dev->internal.attached = true; + } + pthread_mutex_unlock(&g_pci_mutex); + return rc; + } + + driver->cb_fn = enum_cb; + driver->cb_arg = enum_ctx; + + int i = 0; + + do { + rc = rte_eal_hotplug_add("pci", bdf, ""); + } while (rc == -ENOMSG && ++i <= DPDK_HOTPLUG_RETRY_COUNT); + + if (i > 1 && rc == -EEXIST) { + /* Even though the previous request timed out, the device + * was attached successfully. + */ + rc = 0; + } + + driver->cb_arg = NULL; + driver->cb_fn = NULL; + + cleanup_pci_devices(); + + if (rc != 0) { + return -1; + } + + /* explicit attach ignores the whitelist, so if we blacklisted this + * device before let's enable it now - just for clarity. + */ + TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { + if (spdk_pci_addr_compare(&dev->addr, pci_address) == 0) { + break; + } + } + assert(dev != NULL); + + rte_dev = dev->dev_handle; + da = rte_dev->device.devargs; + if (da && da->data) { + da->data = (void *)(uintptr_t)spdk_get_ticks(); + da->policy = RTE_DEV_WHITELISTED; + } + + return 0; +} + +/* Note: You can call spdk_pci_enumerate from more than one thread + * simultaneously safely, but you cannot call spdk_pci_enumerate + * and rte_eal_pci_probe simultaneously. + */ +int +spdk_pci_enumerate(struct spdk_pci_driver *driver, + spdk_pci_enum_cb enum_cb, + void *enum_ctx) +{ + struct spdk_pci_device *dev; + int rc; + + cleanup_pci_devices(); + + pthread_mutex_lock(&g_pci_mutex); + TAILQ_FOREACH(dev, &g_pci_devices, internal.tailq) { + if (dev->internal.attached || + dev->internal.driver != driver || + dev->internal.pending_removal) { + continue; + } + + rc = enum_cb(enum_ctx, dev); + if (rc == 0) { + dev->internal.attached = true; + } else if (rc < 0) { + pthread_mutex_unlock(&g_pci_mutex); + return -1; + } + } + pthread_mutex_unlock(&g_pci_mutex); + + if (scan_pci_bus(true) != 0) { + return -1; + } + + driver->cb_fn = enum_cb; + driver->cb_arg = enum_ctx; + + if (rte_bus_probe() != 0) { + driver->cb_arg = NULL; + driver->cb_fn = NULL; + return -1; + } + + driver->cb_arg = NULL; + driver->cb_fn = NULL; + + cleanup_pci_devices(); + return 0; +} + +struct spdk_pci_device * +spdk_pci_get_first_device(void) +{ + return TAILQ_FIRST(&g_pci_devices); +} + +struct spdk_pci_device * +spdk_pci_get_next_device(struct spdk_pci_device *prev) +{ + return TAILQ_NEXT(prev, internal.tailq); +} + +int +spdk_pci_device_map_bar(struct spdk_pci_device *dev, uint32_t bar, + void **mapped_addr, uint64_t *phys_addr, uint64_t *size) +{ + return dev->map_bar(dev, bar, mapped_addr, phys_addr, size); +} + +int +spdk_pci_device_unmap_bar(struct spdk_pci_device *dev, uint32_t bar, void *addr) +{ + return dev->unmap_bar(dev, bar, addr); +} + +uint32_t +spdk_pci_device_get_domain(struct spdk_pci_device *dev) +{ + return dev->addr.domain; +} + +uint8_t +spdk_pci_device_get_bus(struct spdk_pci_device *dev) +{ + return dev->addr.bus; +} + +uint8_t +spdk_pci_device_get_dev(struct spdk_pci_device *dev) +{ + return dev->addr.dev; +} + +uint8_t +spdk_pci_device_get_func(struct spdk_pci_device *dev) +{ + return dev->addr.func; +} + +uint16_t +spdk_pci_device_get_vendor_id(struct spdk_pci_device *dev) +{ + return dev->id.vendor_id; +} + +uint16_t +spdk_pci_device_get_device_id(struct spdk_pci_device *dev) +{ + return dev->id.device_id; +} + +uint16_t +spdk_pci_device_get_subvendor_id(struct spdk_pci_device *dev) +{ + return dev->id.subvendor_id; +} + +uint16_t +spdk_pci_device_get_subdevice_id(struct spdk_pci_device *dev) +{ + return dev->id.subdevice_id; +} + +struct spdk_pci_id +spdk_pci_device_get_id(struct spdk_pci_device *dev) +{ + return dev->id; +} + +int +spdk_pci_device_get_socket_id(struct spdk_pci_device *dev) +{ + return dev->socket_id; +} + +int +spdk_pci_device_cfg_read(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) +{ + return dev->cfg_read(dev, value, len, offset); +} + +int +spdk_pci_device_cfg_write(struct spdk_pci_device *dev, void *value, uint32_t len, uint32_t offset) +{ + return dev->cfg_write(dev, value, len, offset); +} + +int +spdk_pci_device_cfg_read8(struct spdk_pci_device *dev, uint8_t *value, uint32_t offset) +{ + return spdk_pci_device_cfg_read(dev, value, 1, offset); +} + +int +spdk_pci_device_cfg_write8(struct spdk_pci_device *dev, uint8_t value, uint32_t offset) +{ + return spdk_pci_device_cfg_write(dev, &value, 1, offset); +} + +int +spdk_pci_device_cfg_read16(struct spdk_pci_device *dev, uint16_t *value, uint32_t offset) +{ + return spdk_pci_device_cfg_read(dev, value, 2, offset); +} + +int +spdk_pci_device_cfg_write16(struct spdk_pci_device *dev, uint16_t value, uint32_t offset) +{ + return spdk_pci_device_cfg_write(dev, &value, 2, offset); +} + +int +spdk_pci_device_cfg_read32(struct spdk_pci_device *dev, uint32_t *value, uint32_t offset) +{ + return spdk_pci_device_cfg_read(dev, value, 4, offset); +} + +int +spdk_pci_device_cfg_write32(struct spdk_pci_device *dev, uint32_t value, uint32_t offset) +{ + return spdk_pci_device_cfg_write(dev, &value, 4, offset); +} + +int +spdk_pci_device_get_serial_number(struct spdk_pci_device *dev, char *sn, size_t len) +{ + int err; + uint32_t pos, header = 0; + uint32_t i, buf[2]; + + if (len < 17) { + return -1; + } + + err = spdk_pci_device_cfg_read32(dev, &header, PCI_CFG_SIZE); + if (err || !header) { + return -1; + } + + pos = PCI_CFG_SIZE; + while (1) { + if ((header & 0x0000ffff) == PCI_EXT_CAP_ID_SN) { + if (pos) { + /* skip the header */ + pos += 4; + for (i = 0; i < 2; i++) { + err = spdk_pci_device_cfg_read32(dev, &buf[i], pos + 4 * i); + if (err) { + return -1; + } + } + snprintf(sn, len, "%08x%08x", buf[1], buf[0]); + return 0; + } + } + pos = (header >> 20) & 0xffc; + /* 0 if no other items exist */ + if (pos < PCI_CFG_SIZE) { + return -1; + } + err = spdk_pci_device_cfg_read32(dev, &header, pos); + if (err) { + return -1; + } + } + return -1; +} + +struct spdk_pci_addr +spdk_pci_device_get_addr(struct spdk_pci_device *dev) +{ + return dev->addr; +} + +bool +spdk_pci_device_is_removed(struct spdk_pci_device *dev) +{ + return dev->internal.pending_removal; +} + +int +spdk_pci_addr_compare(const struct spdk_pci_addr *a1, const struct spdk_pci_addr *a2) +{ + if (a1->domain > a2->domain) { + return 1; + } else if (a1->domain < a2->domain) { + return -1; + } else if (a1->bus > a2->bus) { + return 1; + } else if (a1->bus < a2->bus) { + return -1; + } else if (a1->dev > a2->dev) { + return 1; + } else if (a1->dev < a2->dev) { + return -1; + } else if (a1->func > a2->func) { + return 1; + } else if (a1->func < a2->func) { + return -1; + } + + return 0; +} + +#ifdef __linux__ +int +spdk_pci_device_claim(struct spdk_pci_device *dev) +{ + int dev_fd; + char dev_name[64]; + int pid; + void *dev_map; + struct flock pcidev_lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = 0, + .l_len = 0, + }; + + snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", + dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); + + dev_fd = open(dev_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (dev_fd == -1) { + fprintf(stderr, "could not open %s\n", dev_name); + return -errno; + } + + if (ftruncate(dev_fd, sizeof(int)) != 0) { + fprintf(stderr, "could not truncate %s\n", dev_name); + close(dev_fd); + return -errno; + } + + dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_SHARED, dev_fd, 0); + if (dev_map == MAP_FAILED) { + fprintf(stderr, "could not mmap dev %s (%d)\n", dev_name, errno); + close(dev_fd); + return -errno; + } + + if (fcntl(dev_fd, F_SETLK, &pcidev_lock) != 0) { + pid = *(int *)dev_map; + fprintf(stderr, "Cannot create lock on device %s, probably" + " process %d has claimed it\n", dev_name, pid); + munmap(dev_map, sizeof(int)); + close(dev_fd); + /* F_SETLK returns unspecified errnos, normalize them */ + return -EACCES; + } + + *(int *)dev_map = (int)getpid(); + munmap(dev_map, sizeof(int)); + dev->internal.claim_fd = dev_fd; + /* Keep dev_fd open to maintain the lock. */ + return 0; +} + +void +spdk_pci_device_unclaim(struct spdk_pci_device *dev) +{ + char dev_name[64]; + + snprintf(dev_name, sizeof(dev_name), "/tmp/spdk_pci_lock_%04x:%02x:%02x.%x", + dev->addr.domain, dev->addr.bus, dev->addr.dev, dev->addr.func); + + close(dev->internal.claim_fd); + dev->internal.claim_fd = -1; + unlink(dev_name); +} +#endif /* __linux__ */ + +#ifdef __FreeBSD__ +int +spdk_pci_device_claim(struct spdk_pci_device *dev) +{ + /* TODO */ + return 0; +} + +void +spdk_pci_device_unclaim(struct spdk_pci_device *dev) +{ + /* TODO */ +} +#endif /* __FreeBSD__ */ + +int +spdk_pci_addr_parse(struct spdk_pci_addr *addr, const char *bdf) +{ + unsigned domain, bus, dev, func; + + if (addr == NULL || bdf == NULL) { + return -EINVAL; + } + + if ((sscanf(bdf, "%x:%x:%x.%x", &domain, &bus, &dev, &func) == 4) || + (sscanf(bdf, "%x.%x.%x.%x", &domain, &bus, &dev, &func) == 4)) { + /* Matched a full address - all variables are initialized */ + } else if (sscanf(bdf, "%x:%x:%x", &domain, &bus, &dev) == 3) { + func = 0; + } else if ((sscanf(bdf, "%x:%x.%x", &bus, &dev, &func) == 3) || + (sscanf(bdf, "%x.%x.%x", &bus, &dev, &func) == 3)) { + domain = 0; + } else if ((sscanf(bdf, "%x:%x", &bus, &dev) == 2) || + (sscanf(bdf, "%x.%x", &bus, &dev) == 2)) { + domain = 0; + func = 0; + } else { + return -EINVAL; + } + + if (bus > 0xFF || dev > 0x1F || func > 7) { + return -EINVAL; + } + + addr->domain = domain; + addr->bus = bus; + addr->dev = dev; + addr->func = func; + + return 0; +} + +int +spdk_pci_addr_fmt(char *bdf, size_t sz, const struct spdk_pci_addr *addr) +{ + int rc; + + rc = snprintf(bdf, sz, "%04x:%02x:%02x.%x", + addr->domain, addr->bus, + addr->dev, addr->func); + + if (rc > 0 && (size_t)rc < sz) { + return 0; + } + + return -1; +} + +void +spdk_pci_hook_device(struct spdk_pci_driver *drv, struct spdk_pci_device *dev) +{ + assert(dev->map_bar != NULL); + assert(dev->unmap_bar != NULL); + assert(dev->cfg_read != NULL); + assert(dev->cfg_write != NULL); + dev->internal.driver = drv; + TAILQ_INSERT_TAIL(&g_pci_devices, dev, internal.tailq); +} + +void +spdk_pci_unhook_device(struct spdk_pci_device *dev) +{ + assert(!dev->internal.attached); + TAILQ_REMOVE(&g_pci_devices, dev, internal.tailq); +} + +const char * +spdk_pci_device_get_type(const struct spdk_pci_device *dev) +{ + return dev->type; +} |