diff options
Diffstat (limited to 'src/spdk/dpdk/drivers/event')
82 files changed, 31453 insertions, 0 deletions
diff --git a/src/spdk/dpdk/drivers/event/Makefile b/src/spdk/dpdk/drivers/event/Makefile new file mode 100644 index 000000000..86be41b9e --- /dev/null +++ b/src/spdk/dpdk/drivers/event/Makefile @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2016 Cavium, Inc +# + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV) += skeleton +DIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw +DIRS-$(CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV) += dsw +DIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += octeontx +DIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += octeontx2 +ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y) +DIRS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV) += dpaa +endif +ifeq ($(CONFIG_RTE_EAL_VFIO)$(CONFIG_RTE_LIBRTE_FSLMC_BUS),yy) +DIRS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV) += dpaa2 +endif +DIRS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/src/spdk/dpdk/drivers/event/dpaa/Makefile b/src/spdk/dpdk/drivers/event/dpaa/Makefile new file mode 100644 index 000000000..a39dc27c6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017 NXP +# + +include $(RTE_SDK)/mk/rte.vars.mk +RTE_SDK_DPAA=$(RTE_SDK)/drivers/net/dpaa + +# +# library name +# +LIB = librte_pmd_dpaa_event.a + +CFLAGS := -I$(SRCDIR) $(CFLAGS) +CFLAGS += -O3 $(WERROR_FLAGS) +CFLAGS += -Wno-pointer-arith +CFLAGS += -I$(RTE_SDK_DPAA)/ +CFLAGS += -I$(RTE_SDK_DPAA)/include +CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa +CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/include/ +CFLAGS += -I$(RTE_SDK)/drivers/mempool/dpaa +CFLAGS += -I$(RTE_SDK)/drivers/common/dpaax +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/include + +LDLIBS += -lrte_pmd_dpaa_sec +CFLAGS += -I$(RTE_SDK)/drivers/crypto/dpaa_sec + +EXPORT_MAP := rte_pmd_dpaa_event_version.map + +# Interfaces with DPDK +SRCS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV) += dpaa_eventdev.c + +LDLIBS += -lrte_bus_dpaa +LDLIBS += -lrte_mempool_dpaa +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs +LDLIBS += -lrte_eventdev -lrte_pmd_dpaa -lrte_bus_vdev +LDLIBS += -lrte_common_dpaax + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/event/dpaa/dpaa_eventdev.c b/src/spdk/dpdk/drivers/event/dpaa/dpaa_eventdev.c new file mode 100644 index 000000000..5a018d487 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa/dpaa_eventdev.c @@ -0,0 +1,1077 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017-2019 NXP + */ + +#include <assert.h> +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <sys/epoll.h> + +#include <rte_atomic.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_dev.h> +#include <rte_eal.h> +#include <rte_lcore.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memcpy.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_pci.h> +#include <rte_eventdev.h> +#include <rte_eventdev_pmd_vdev.h> +#include <rte_ethdev.h> +#include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> +#include <rte_cryptodev.h> +#include <rte_dpaa_bus.h> +#include <rte_dpaa_logs.h> +#include <rte_cycles.h> +#include <rte_kvargs.h> + +#include <dpaa_ethdev.h> +#include <dpaa_sec_event.h> +#include "dpaa_eventdev.h" +#include <dpaa_mempool.h> + +/* + * Clarifications + * Evendev = Virtual Instance for SoC + * Eventport = Portal Instance + * Eventqueue = Channel Instance + * 1 Eventdev can have N Eventqueue + */ +int dpaa_logtype_eventdev; + +#define DISABLE_INTR_MODE "disable_intr" + +static int +dpaa_event_dequeue_timeout_ticks(struct rte_eventdev *dev, uint64_t ns, + uint64_t *timeout_ticks) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + uint64_t cycles_per_second; + + cycles_per_second = rte_get_timer_hz(); + *timeout_ticks = (ns * cycles_per_second) / NS_PER_S; + + return 0; +} + +static int +dpaa_event_dequeue_timeout_ticks_intr(struct rte_eventdev *dev, uint64_t ns, + uint64_t *timeout_ticks) +{ + RTE_SET_USED(dev); + + *timeout_ticks = ns/1000; + return 0; +} + +static void +dpaa_eventq_portal_add(u16 ch_id) +{ + uint32_t sdqcr; + + sdqcr = QM_SDQCR_CHANNELS_POOL_CONV(ch_id); + qman_static_dequeue_add(sdqcr, NULL); +} + +static uint16_t +dpaa_event_enqueue_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + uint16_t i; + struct rte_mbuf *mbuf; + + RTE_SET_USED(port); + /*Release all the contexts saved previously*/ + for (i = 0; i < nb_events; i++) { + switch (ev[i].op) { + case RTE_EVENT_OP_RELEASE: + qman_dca_index(ev[i].impl_opaque, 0); + mbuf = DPAA_PER_LCORE_DQRR_MBUF(i); + mbuf->seqn = DPAA_INVALID_MBUF_SEQN; + DPAA_PER_LCORE_DQRR_HELD &= ~(1 << i); + DPAA_PER_LCORE_DQRR_SIZE--; + break; + default: + break; + } + } + + return nb_events; +} + +static uint16_t +dpaa_event_enqueue(void *port, const struct rte_event *ev) +{ + return dpaa_event_enqueue_burst(port, ev, 1); +} + +static void drain_4_bytes(int fd, fd_set *fdset) +{ + if (FD_ISSET(fd, fdset)) { + /* drain 4 bytes */ + uint32_t junk; + ssize_t sjunk = read(qman_thread_fd(), &junk, sizeof(junk)); + if (sjunk != sizeof(junk)) + DPAA_EVENTDEV_ERR("UIO irq read error"); + } +} + +static inline int +dpaa_event_dequeue_wait(uint64_t timeout_ticks) +{ + int fd_qman, nfds; + int ret; + fd_set readset; + + /* Go into (and back out of) IRQ mode for each select, + * it simplifies exit-path considerations and other + * potential nastiness. + */ + struct timeval tv = { + .tv_sec = timeout_ticks / 1000000, + .tv_usec = timeout_ticks % 1000000 + }; + + fd_qman = qman_thread_fd(); + nfds = fd_qman + 1; + FD_ZERO(&readset); + FD_SET(fd_qman, &readset); + + qman_irqsource_add(QM_PIRQ_DQRI); + + ret = select(nfds, &readset, NULL, NULL, &tv); + if (ret < 0) + return ret; + /* Calling irqsource_remove() prior to thread_irq() + * means thread_irq() will not process whatever caused + * the interrupts, however it does ensure that, once + * thread_irq() re-enables interrupts, they won't fire + * again immediately. + */ + qman_irqsource_remove(~0); + drain_4_bytes(fd_qman, &readset); + qman_thread_irq(); + + return ret; +} + +static uint16_t +dpaa_event_dequeue_burst(void *port, struct rte_event ev[], + uint16_t nb_events, uint64_t timeout_ticks) +{ + int ret; + u16 ch_id; + void *buffers[8]; + u32 num_frames, i, irq = 0; + uint64_t cur_ticks = 0, wait_time_ticks = 0; + struct dpaa_port *portal = (struct dpaa_port *)port; + struct rte_mbuf *mbuf; + + if (unlikely(!RTE_PER_LCORE(dpaa_io))) { + /* Affine current thread context to a qman portal */ + ret = rte_dpaa_portal_init((void *)0); + if (ret) { + DPAA_EVENTDEV_ERR("Unable to initialize portal"); + return ret; + } + } + + if (unlikely(!portal->is_port_linked)) { + /* + * Affine event queue for current thread context + * to a qman portal. + */ + for (i = 0; i < portal->num_linked_evq; i++) { + ch_id = portal->evq_info[i].ch_id; + dpaa_eventq_portal_add(ch_id); + } + portal->is_port_linked = true; + } + + /* Check if there are atomic contexts to be released */ + i = 0; + while (DPAA_PER_LCORE_DQRR_SIZE) { + if (DPAA_PER_LCORE_DQRR_HELD & (1 << i)) { + qman_dca_index(i, 0); + mbuf = DPAA_PER_LCORE_DQRR_MBUF(i); + mbuf->seqn = DPAA_INVALID_MBUF_SEQN; + DPAA_PER_LCORE_DQRR_HELD &= ~(1 << i); + DPAA_PER_LCORE_DQRR_SIZE--; + } + i++; + } + DPAA_PER_LCORE_DQRR_HELD = 0; + + if (timeout_ticks) + wait_time_ticks = timeout_ticks; + else + wait_time_ticks = portal->timeout_us; + + wait_time_ticks += rte_get_timer_cycles(); + do { + /* Lets dequeue the frames */ + num_frames = qman_portal_dequeue(ev, nb_events, buffers); + if (irq) + irq = 0; + if (num_frames) + break; + cur_ticks = rte_get_timer_cycles(); + } while (cur_ticks < wait_time_ticks); + + return num_frames; +} + +static uint16_t +dpaa_event_dequeue(void *port, struct rte_event *ev, uint64_t timeout_ticks) +{ + return dpaa_event_dequeue_burst(port, ev, 1, timeout_ticks); +} + +static uint16_t +dpaa_event_dequeue_burst_intr(void *port, struct rte_event ev[], + uint16_t nb_events, uint64_t timeout_ticks) +{ + int ret; + u16 ch_id; + void *buffers[8]; + u32 num_frames, i, irq = 0; + uint64_t cur_ticks = 0, wait_time_ticks = 0; + struct dpaa_port *portal = (struct dpaa_port *)port; + struct rte_mbuf *mbuf; + + if (unlikely(!RTE_PER_LCORE(dpaa_io))) { + /* Affine current thread context to a qman portal */ + ret = rte_dpaa_portal_init((void *)0); + if (ret) { + DPAA_EVENTDEV_ERR("Unable to initialize portal"); + return ret; + } + } + + if (unlikely(!portal->is_port_linked)) { + /* + * Affine event queue for current thread context + * to a qman portal. + */ + for (i = 0; i < portal->num_linked_evq; i++) { + ch_id = portal->evq_info[i].ch_id; + dpaa_eventq_portal_add(ch_id); + } + portal->is_port_linked = true; + } + + /* Check if there are atomic contexts to be released */ + i = 0; + while (DPAA_PER_LCORE_DQRR_SIZE) { + if (DPAA_PER_LCORE_DQRR_HELD & (1 << i)) { + qman_dca_index(i, 0); + mbuf = DPAA_PER_LCORE_DQRR_MBUF(i); + mbuf->seqn = DPAA_INVALID_MBUF_SEQN; + DPAA_PER_LCORE_DQRR_HELD &= ~(1 << i); + DPAA_PER_LCORE_DQRR_SIZE--; + } + i++; + } + DPAA_PER_LCORE_DQRR_HELD = 0; + + if (timeout_ticks) + wait_time_ticks = timeout_ticks; + else + wait_time_ticks = portal->timeout_us; + + do { + /* Lets dequeue the frames */ + num_frames = qman_portal_dequeue(ev, nb_events, buffers); + if (irq) + irq = 0; + if (num_frames) + break; + if (wait_time_ticks) { /* wait for time */ + if (dpaa_event_dequeue_wait(wait_time_ticks) > 0) { + irq = 1; + continue; + } + break; /* no event after waiting */ + } + cur_ticks = rte_get_timer_cycles(); + } while (cur_ticks < wait_time_ticks); + + return num_frames; +} + +static uint16_t +dpaa_event_dequeue_intr(void *port, + struct rte_event *ev, + uint64_t timeout_ticks) +{ + return dpaa_event_dequeue_burst_intr(port, ev, 1, timeout_ticks); +} + +static void +dpaa_event_dev_info_get(struct rte_eventdev *dev, + struct rte_event_dev_info *dev_info) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + dev_info->driver_name = "event_dpaa1"; + dev_info->min_dequeue_timeout_ns = + DPAA_EVENT_MIN_DEQUEUE_TIMEOUT; + dev_info->max_dequeue_timeout_ns = + DPAA_EVENT_MAX_DEQUEUE_TIMEOUT; + dev_info->dequeue_timeout_ns = + DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_NS; + dev_info->max_event_queues = + DPAA_EVENT_MAX_QUEUES; + dev_info->max_event_queue_flows = + DPAA_EVENT_MAX_QUEUE_FLOWS; + dev_info->max_event_queue_priority_levels = + DPAA_EVENT_MAX_QUEUE_PRIORITY_LEVELS; + dev_info->max_event_priority_levels = + DPAA_EVENT_MAX_EVENT_PRIORITY_LEVELS; + dev_info->max_event_ports = + DPAA_EVENT_MAX_EVENT_PORT; + dev_info->max_event_port_dequeue_depth = + DPAA_EVENT_MAX_PORT_DEQUEUE_DEPTH; + dev_info->max_event_port_enqueue_depth = + DPAA_EVENT_MAX_PORT_ENQUEUE_DEPTH; + /* + * TODO: Need to find out that how to fetch this info + * from kernel or somewhere else. + */ + dev_info->max_num_events = + DPAA_EVENT_MAX_NUM_EVENTS; + dev_info->event_dev_cap = + RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED | + RTE_EVENT_DEV_CAP_BURST_MODE | + RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT | + RTE_EVENT_DEV_CAP_NONSEQ_MODE; +} + +static int +dpaa_event_dev_configure(const struct rte_eventdev *dev) +{ + struct dpaa_eventdev *priv = dev->data->dev_private; + struct rte_event_dev_config *conf = &dev->data->dev_conf; + int ret, i; + uint32_t *ch_id; + + EVENTDEV_INIT_FUNC_TRACE(); + priv->dequeue_timeout_ns = conf->dequeue_timeout_ns; + priv->nb_events_limit = conf->nb_events_limit; + priv->nb_event_queues = conf->nb_event_queues; + priv->nb_event_ports = conf->nb_event_ports; + priv->nb_event_queue_flows = conf->nb_event_queue_flows; + priv->nb_event_port_dequeue_depth = conf->nb_event_port_dequeue_depth; + priv->nb_event_port_enqueue_depth = conf->nb_event_port_enqueue_depth; + priv->event_dev_cfg = conf->event_dev_cfg; + + ch_id = rte_malloc("dpaa-channels", + sizeof(uint32_t) * priv->nb_event_queues, + RTE_CACHE_LINE_SIZE); + if (ch_id == NULL) { + DPAA_EVENTDEV_ERR("Fail to allocate memory for dpaa channels\n"); + return -ENOMEM; + } + /* Create requested event queues within the given event device */ + ret = qman_alloc_pool_range(ch_id, priv->nb_event_queues, 1, 0); + if (ret < 0) { + DPAA_EVENTDEV_ERR("qman_alloc_pool_range %u, err =%d\n", + priv->nb_event_queues, ret); + rte_free(ch_id); + return ret; + } + for (i = 0; i < priv->nb_event_queues; i++) + priv->evq_info[i].ch_id = (u16)ch_id[i]; + + /* Lets prepare event ports */ + memset(&priv->ports[0], 0, + sizeof(struct dpaa_port) * priv->nb_event_ports); + + /* Check dequeue timeout method is per dequeue or global */ + if (priv->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) { + /* + * Use timeout value as given in dequeue operation. + * So invalidating this timeout value. + */ + priv->dequeue_timeout_ns = 0; + + } else if (conf->dequeue_timeout_ns == 0) { + priv->dequeue_timeout_ns = DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_NS; + } else { + priv->dequeue_timeout_ns = conf->dequeue_timeout_ns; + } + + for (i = 0; i < priv->nb_event_ports; i++) { + if (priv->intr_mode) { + priv->ports[i].timeout_us = + priv->dequeue_timeout_ns/1000; + } else { + uint64_t cycles_per_second; + + cycles_per_second = rte_get_timer_hz(); + priv->ports[i].timeout_us = + (priv->dequeue_timeout_ns * cycles_per_second) + / NS_PER_S; + } + } + + /* + * TODO: Currently portals are affined with threads. Maximum threads + * can be created equals to number of lcore. + */ + rte_free(ch_id); + DPAA_EVENTDEV_INFO("Configured eventdev devid=%d", dev->data->dev_id); + + return 0; +} + +static int +dpaa_event_dev_start(struct rte_eventdev *dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + RTE_SET_USED(dev); + + return 0; +} + +static void +dpaa_event_dev_stop(struct rte_eventdev *dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + RTE_SET_USED(dev); +} + +static int +dpaa_event_dev_close(struct rte_eventdev *dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + RTE_SET_USED(dev); + + return 0; +} + +static void +dpaa_event_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id, + struct rte_event_queue_conf *queue_conf) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); + + memset(queue_conf, 0, sizeof(struct rte_event_queue_conf)); + queue_conf->nb_atomic_flows = DPAA_EVENT_QUEUE_ATOMIC_FLOWS; + queue_conf->schedule_type = RTE_SCHED_TYPE_PARALLEL; + queue_conf->priority = RTE_EVENT_DEV_PRIORITY_HIGHEST; +} + +static int +dpaa_event_queue_setup(struct rte_eventdev *dev, uint8_t queue_id, + const struct rte_event_queue_conf *queue_conf) +{ + struct dpaa_eventdev *priv = dev->data->dev_private; + struct dpaa_eventq *evq_info = &priv->evq_info[queue_id]; + + EVENTDEV_INIT_FUNC_TRACE(); + + switch (queue_conf->schedule_type) { + case RTE_SCHED_TYPE_PARALLEL: + case RTE_SCHED_TYPE_ATOMIC: + break; + case RTE_SCHED_TYPE_ORDERED: + DPAA_EVENTDEV_ERR("Schedule type is not supported."); + return -1; + } + evq_info->event_queue_cfg = queue_conf->event_queue_cfg; + evq_info->event_queue_id = queue_id; + + return 0; +} + +static void +dpaa_event_queue_release(struct rte_eventdev *dev, uint8_t queue_id) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); +} + +static void +dpaa_event_port_default_conf_get(struct rte_eventdev *dev, uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(port_id); + + port_conf->new_event_threshold = DPAA_EVENT_MAX_NUM_EVENTS; + port_conf->dequeue_depth = DPAA_EVENT_MAX_PORT_DEQUEUE_DEPTH; + port_conf->enqueue_depth = DPAA_EVENT_MAX_PORT_ENQUEUE_DEPTH; +} + +static int +dpaa_event_port_setup(struct rte_eventdev *dev, uint8_t port_id, + const struct rte_event_port_conf *port_conf) +{ + struct dpaa_eventdev *eventdev = dev->data->dev_private; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(port_conf); + dev->data->ports[port_id] = &eventdev->ports[port_id]; + + return 0; +} + +static void +dpaa_event_port_release(void *port) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(port); +} + +static int +dpaa_event_port_link(struct rte_eventdev *dev, void *port, + const uint8_t queues[], const uint8_t priorities[], + uint16_t nb_links) +{ + struct dpaa_eventdev *priv = dev->data->dev_private; + struct dpaa_port *event_port = (struct dpaa_port *)port; + struct dpaa_eventq *event_queue; + uint8_t eventq_id; + int i; + + RTE_SET_USED(dev); + RTE_SET_USED(priorities); + + /* First check that input configuration are valid */ + for (i = 0; i < nb_links; i++) { + eventq_id = queues[i]; + event_queue = &priv->evq_info[eventq_id]; + if ((event_queue->event_queue_cfg + & RTE_EVENT_QUEUE_CFG_SINGLE_LINK) + && (event_queue->event_port)) { + return -EINVAL; + } + } + + for (i = 0; i < nb_links; i++) { + eventq_id = queues[i]; + event_queue = &priv->evq_info[eventq_id]; + event_port->evq_info[i].event_queue_id = eventq_id; + event_port->evq_info[i].ch_id = event_queue->ch_id; + event_queue->event_port = port; + } + + event_port->num_linked_evq = event_port->num_linked_evq + i; + + return (int)i; +} + +static int +dpaa_event_port_unlink(struct rte_eventdev *dev, void *port, + uint8_t queues[], uint16_t nb_links) +{ + int i; + uint8_t eventq_id; + struct dpaa_eventq *event_queue; + struct dpaa_eventdev *priv = dev->data->dev_private; + struct dpaa_port *event_port = (struct dpaa_port *)port; + + if (!event_port->num_linked_evq) + return nb_links; + + for (i = 0; i < nb_links; i++) { + eventq_id = queues[i]; + event_port->evq_info[eventq_id].event_queue_id = -1; + event_port->evq_info[eventq_id].ch_id = 0; + event_queue = &priv->evq_info[eventq_id]; + event_queue->event_port = NULL; + } + + if (event_port->num_linked_evq) + event_port->num_linked_evq = event_port->num_linked_evq - i; + + return (int)i; +} + +static int +dpaa_event_eth_rx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + uint32_t *caps) +{ + const char *ethdev_driver = eth_dev->device->driver->name; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + if (!strcmp(ethdev_driver, "net_dpaa")) + *caps = RTE_EVENT_ETH_RX_ADAPTER_DPAA_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + + return 0; +} + +static int +dpaa_event_eth_rx_adapter_queue_add( + const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct dpaa_eventdev *eventdev = dev->data->dev_private; + uint8_t ev_qid = queue_conf->ev.queue_id; + u16 ch_id = eventdev->evq_info[ev_qid].ch_id; + struct dpaa_if *dpaa_intf = eth_dev->data->dev_private; + int ret, i; + + EVENTDEV_INIT_FUNC_TRACE(); + + if (rx_queue_id == -1) { + for (i = 0; i < dpaa_intf->nb_rx_queues; i++) { + ret = dpaa_eth_eventq_attach(eth_dev, i, ch_id, + queue_conf); + if (ret) { + DPAA_EVENTDEV_ERR( + "Event Queue attach failed:%d\n", ret); + goto detach_configured_queues; + } + } + return 0; + } + + ret = dpaa_eth_eventq_attach(eth_dev, rx_queue_id, ch_id, queue_conf); + if (ret) + DPAA_EVENTDEV_ERR("dpaa_eth_eventq_attach failed:%d\n", ret); + return ret; + +detach_configured_queues: + + for (i = (i - 1); i >= 0 ; i--) + dpaa_eth_eventq_detach(eth_dev, i); + + return ret; +} + +static int +dpaa_event_eth_rx_adapter_queue_del(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int ret, i; + struct dpaa_if *dpaa_intf = eth_dev->data->dev_private; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + if (rx_queue_id == -1) { + for (i = 0; i < dpaa_intf->nb_rx_queues; i++) { + ret = dpaa_eth_eventq_detach(eth_dev, i); + if (ret) + DPAA_EVENTDEV_ERR( + "Event Queue detach failed:%d\n", ret); + } + + return 0; + } + + ret = dpaa_eth_eventq_detach(eth_dev, rx_queue_id); + if (ret) + DPAA_EVENTDEV_ERR("dpaa_eth_eventq_detach failed:%d\n", ret); + return ret; +} + +static int +dpaa_event_eth_rx_adapter_start(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +static int +dpaa_event_eth_rx_adapter_stop(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +static int +dpaa_eventdev_crypto_caps_get(const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev, + uint32_t *caps) +{ + const char *name = cdev->data->name; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + if (!strncmp(name, "dpaa_sec-", 9)) + *caps = RTE_EVENT_CRYPTO_ADAPTER_DPAA_CAP; + else + return -1; + + return 0; +} + +static int +dpaa_eventdev_crypto_queue_add_all(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev, + const struct rte_event *ev) +{ + struct dpaa_eventdev *priv = dev->data->dev_private; + uint8_t ev_qid = ev->queue_id; + u16 ch_id = priv->evq_info[ev_qid].ch_id; + int i, ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + for (i = 0; i < cryptodev->data->nb_queue_pairs; i++) { + ret = dpaa_sec_eventq_attach(cryptodev, i, + ch_id, ev); + if (ret) { + DPAA_EVENTDEV_ERR("dpaa_sec_eventq_attach failed: ret %d\n", + ret); + goto fail; + } + } + return 0; +fail: + for (i = (i - 1); i >= 0 ; i--) + dpaa_sec_eventq_detach(cryptodev, i); + + return ret; +} + +static int +dpaa_eventdev_crypto_queue_add(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev, + int32_t rx_queue_id, + const struct rte_event *ev) +{ + struct dpaa_eventdev *priv = dev->data->dev_private; + uint8_t ev_qid = ev->queue_id; + u16 ch_id = priv->evq_info[ev_qid].ch_id; + int ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + if (rx_queue_id == -1) + return dpaa_eventdev_crypto_queue_add_all(dev, + cryptodev, ev); + + ret = dpaa_sec_eventq_attach(cryptodev, rx_queue_id, + ch_id, ev); + if (ret) { + DPAA_EVENTDEV_ERR( + "dpaa_sec_eventq_attach failed: ret: %d\n", ret); + return ret; + } + return 0; +} + +static int +dpaa_eventdev_crypto_queue_del_all(const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev) +{ + int i, ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + for (i = 0; i < cdev->data->nb_queue_pairs; i++) { + ret = dpaa_sec_eventq_detach(cdev, i); + if (ret) { + DPAA_EVENTDEV_ERR( + "dpaa_sec_eventq_detach failed:ret %d\n", ret); + return ret; + } + } + + return 0; +} + +static int +dpaa_eventdev_crypto_queue_del(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev, + int32_t rx_queue_id) +{ + int ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + if (rx_queue_id == -1) + return dpaa_eventdev_crypto_queue_del_all(dev, cryptodev); + + ret = dpaa_sec_eventq_detach(cryptodev, rx_queue_id); + if (ret) { + DPAA_EVENTDEV_ERR( + "dpaa_sec_eventq_detach failed: ret: %d\n", ret); + return ret; + } + + return 0; +} + +static int +dpaa_eventdev_crypto_start(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(cryptodev); + + return 0; +} + +static int +dpaa_eventdev_crypto_stop(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(cryptodev); + + return 0; +} + +static int +dpaa_eventdev_tx_adapter_create(uint8_t id, + const struct rte_eventdev *dev) +{ + RTE_SET_USED(id); + RTE_SET_USED(dev); + + /* Nothing to do. Simply return. */ + return 0; +} + +static int +dpaa_eventdev_tx_adapter_caps(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + uint32_t *caps) +{ + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + return 0; +} + +static uint16_t +dpaa_eventdev_txa_enqueue_same_dest(void *port, + struct rte_event ev[], + uint16_t nb_events) +{ + struct rte_mbuf *m[DPAA_EVENT_MAX_PORT_ENQUEUE_DEPTH], *m0; + uint8_t qid, i; + + RTE_SET_USED(port); + + m0 = (struct rte_mbuf *)ev[0].mbuf; + qid = rte_event_eth_tx_adapter_txq_get(m0); + + for (i = 0; i < nb_events; i++) + m[i] = (struct rte_mbuf *)ev[i].mbuf; + + return rte_eth_tx_burst(m0->port, qid, m, nb_events); +} + +static uint16_t +dpaa_eventdev_txa_enqueue(void *port, + struct rte_event ev[], + uint16_t nb_events) +{ + struct rte_mbuf *m = (struct rte_mbuf *)ev[0].mbuf; + uint8_t qid, i; + + RTE_SET_USED(port); + + for (i = 0; i < nb_events; i++) { + qid = rte_event_eth_tx_adapter_txq_get(m); + rte_eth_tx_burst(m->port, qid, &m, 1); + } + + return nb_events; +} + +static struct rte_eventdev_ops dpaa_eventdev_ops = { + .dev_infos_get = dpaa_event_dev_info_get, + .dev_configure = dpaa_event_dev_configure, + .dev_start = dpaa_event_dev_start, + .dev_stop = dpaa_event_dev_stop, + .dev_close = dpaa_event_dev_close, + .queue_def_conf = dpaa_event_queue_def_conf, + .queue_setup = dpaa_event_queue_setup, + .queue_release = dpaa_event_queue_release, + .port_def_conf = dpaa_event_port_default_conf_get, + .port_setup = dpaa_event_port_setup, + .port_release = dpaa_event_port_release, + .port_link = dpaa_event_port_link, + .port_unlink = dpaa_event_port_unlink, + .timeout_ticks = dpaa_event_dequeue_timeout_ticks, + .eth_rx_adapter_caps_get = dpaa_event_eth_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = dpaa_event_eth_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = dpaa_event_eth_rx_adapter_queue_del, + .eth_rx_adapter_start = dpaa_event_eth_rx_adapter_start, + .eth_rx_adapter_stop = dpaa_event_eth_rx_adapter_stop, + .eth_tx_adapter_caps_get = dpaa_eventdev_tx_adapter_caps, + .eth_tx_adapter_create = dpaa_eventdev_tx_adapter_create, + .crypto_adapter_caps_get = dpaa_eventdev_crypto_caps_get, + .crypto_adapter_queue_pair_add = dpaa_eventdev_crypto_queue_add, + .crypto_adapter_queue_pair_del = dpaa_eventdev_crypto_queue_del, + .crypto_adapter_start = dpaa_eventdev_crypto_start, + .crypto_adapter_stop = dpaa_eventdev_crypto_stop, +}; + +static int flag_check_handler(__rte_unused const char *key, + const char *value, __rte_unused void *opaque) +{ + if (strcmp(value, "1")) + return -1; + + return 0; +} + +static int +dpaa_event_check_flags(const char *params) +{ + struct rte_kvargs *kvlist; + + if (params == NULL || params[0] == '\0') + return 0; + + kvlist = rte_kvargs_parse(params, NULL); + if (kvlist == NULL) + return 0; + + if (!rte_kvargs_count(kvlist, DISABLE_INTR_MODE)) { + rte_kvargs_free(kvlist); + return 0; + } + /* INTR MODE is disabled when there's key-value pair: disable_intr = 1*/ + if (rte_kvargs_process(kvlist, DISABLE_INTR_MODE, + flag_check_handler, NULL) < 0) { + rte_kvargs_free(kvlist); + return 0; + } + rte_kvargs_free(kvlist); + + return 1; +} + +static int +dpaa_event_dev_create(const char *name, const char *params) +{ + struct rte_eventdev *eventdev; + struct dpaa_eventdev *priv; + + eventdev = rte_event_pmd_vdev_init(name, + sizeof(struct dpaa_eventdev), + rte_socket_id()); + if (eventdev == NULL) { + DPAA_EVENTDEV_ERR("Failed to create eventdev vdev %s", name); + goto fail; + } + priv = eventdev->data->dev_private; + + eventdev->dev_ops = &dpaa_eventdev_ops; + eventdev->enqueue = dpaa_event_enqueue; + eventdev->enqueue_burst = dpaa_event_enqueue_burst; + + if (dpaa_event_check_flags(params)) { + eventdev->dequeue = dpaa_event_dequeue; + eventdev->dequeue_burst = dpaa_event_dequeue_burst; + } else { + priv->intr_mode = 1; + eventdev->dev_ops->timeout_ticks = + dpaa_event_dequeue_timeout_ticks_intr; + eventdev->dequeue = dpaa_event_dequeue_intr; + eventdev->dequeue_burst = dpaa_event_dequeue_burst_intr; + } + eventdev->txa_enqueue = dpaa_eventdev_txa_enqueue; + eventdev->txa_enqueue_same_dest = dpaa_eventdev_txa_enqueue_same_dest; + + RTE_LOG(INFO, PMD, "%s eventdev added", name); + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + priv->max_event_queues = DPAA_EVENT_MAX_QUEUES; + + return 0; +fail: + return -EFAULT; +} + +static int +dpaa_event_dev_probe(struct rte_vdev_device *vdev) +{ + const char *name; + const char *params; + + name = rte_vdev_device_name(vdev); + DPAA_EVENTDEV_INFO("Initializing %s", name); + + params = rte_vdev_device_args(vdev); + + return dpaa_event_dev_create(name, params); +} + +static int +dpaa_event_dev_remove(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + DPAA_EVENTDEV_INFO("Closing %s", name); + + return rte_event_pmd_vdev_uninit(name); +} + +static struct rte_vdev_driver vdev_eventdev_dpaa_pmd = { + .probe = dpaa_event_dev_probe, + .remove = dpaa_event_dev_remove +}; + +RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_DPAA_PMD, vdev_eventdev_dpaa_pmd); +RTE_PMD_REGISTER_PARAM_STRING(EVENTDEV_NAME_DPAA_PMD, + DISABLE_INTR_MODE "=<int>"); +RTE_INIT(dpaa_event_init_log) +{ + dpaa_logtype_eventdev = rte_log_register("pmd.event.dpaa"); + if (dpaa_logtype_eventdev >= 0) + rte_log_set_level(dpaa_logtype_eventdev, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/event/dpaa/dpaa_eventdev.h b/src/spdk/dpdk/drivers/event/dpaa/dpaa_eventdev.h new file mode 100644 index 000000000..0b3da83a4 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa/dpaa_eventdev.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 NXP + */ + +#ifndef __DPAA_EVENTDEV_H__ +#define __DPAA_EVENTDEV_H__ + +#include <rte_eventdev_pmd.h> +#include <rte_eventdev_pmd_vdev.h> +#include <rte_atomic.h> +#include <rte_per_lcore.h> + +#define EVENTDEV_NAME_DPAA_PMD event_dpaa1 + +#define DPAA_EVENT_MAX_PORTS 4 +#define DPAA_EVENT_MAX_QUEUES 8 +#define DPAA_EVENT_MIN_DEQUEUE_TIMEOUT 1 +#define DPAA_EVENT_MAX_DEQUEUE_TIMEOUT (UINT32_MAX - 1) +#define DPAA_EVENT_MAX_QUEUE_FLOWS 2048 +#define DPAA_EVENT_MAX_QUEUE_PRIORITY_LEVELS 8 +#define DPAA_EVENT_MAX_EVENT_PRIORITY_LEVELS 0 +#define DPAA_EVENT_MAX_EVENT_PORT RTE_MIN(RTE_MAX_LCORE, INT8_MAX) +#define DPAA_EVENT_MAX_PORT_DEQUEUE_DEPTH 8 +#define DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_NS 100000UL +#define DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_INVALID ((uint64_t)-1) +#define DPAA_EVENT_MAX_PORT_ENQUEUE_DEPTH 1 +#define DPAA_EVENT_MAX_NUM_EVENTS (INT32_MAX - 1) + +#define DPAA_EVENT_DEV_CAP \ +do { \ + RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED | \ + RTE_EVENT_DEV_CAP_BURST_MODE; \ +} while (0) + +#define DPAA_EVENT_QUEUE_ATOMIC_FLOWS 2048 +#define DPAA_EVENT_QUEUE_ORDER_SEQUENCES 2048 + +#define RTE_EVENT_ETH_RX_ADAPTER_DPAA_CAP \ + (RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | \ + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | \ + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID) + +#define RTE_EVENT_CRYPTO_ADAPTER_DPAA_CAP \ + (RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW | \ + RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND | \ + RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA) + +struct dpaa_eventq { + /* Channel Id */ + uint16_t ch_id; + /* Configuration provided by the user */ + uint32_t event_queue_cfg; + uint32_t event_queue_id; + /* Event port */ + void *event_port; +}; + +struct dpaa_port { + struct dpaa_eventq evq_info[DPAA_EVENT_MAX_QUEUES]; + uint8_t num_linked_evq; + uint8_t is_port_linked; + uint64_t timeout_us; +}; + +struct dpaa_eventdev { + struct dpaa_eventq evq_info[DPAA_EVENT_MAX_QUEUES]; + struct dpaa_port ports[DPAA_EVENT_MAX_PORTS]; + uint32_t dequeue_timeout_ns; + uint32_t nb_events_limit; + uint8_t max_event_queues; + uint8_t nb_event_queues; + uint8_t nb_event_ports; + uint8_t intr_mode; + uint32_t nb_event_queue_flows; + uint32_t nb_event_port_dequeue_depth; + uint32_t nb_event_port_enqueue_depth; + uint32_t event_dev_cfg; +}; + +#define DPAA_EVENTDEV_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, dpaa_logtype_eventdev, "%s(): " fmt "\n", \ + __func__, ##args) + +#define EVENTDEV_INIT_FUNC_TRACE() DPAA_EVENTDEV_LOG(DEBUG, " >>") + +#define DPAA_EVENTDEV_DEBUG(fmt, args...) \ + DPAA_EVENTDEV_LOG(DEBUG, fmt, ## args) +#define DPAA_EVENTDEV_ERR(fmt, args...) \ + DPAA_EVENTDEV_LOG(ERR, fmt, ## args) +#define DPAA_EVENTDEV_INFO(fmt, args...) \ + DPAA_EVENTDEV_LOG(INFO, fmt, ## args) +#define DPAA_EVENTDEV_WARN(fmt, args...) \ + DPAA_EVENTDEV_LOG(WARNING, fmt, ## args) + +#endif /* __DPAA_EVENTDEV_H__ */ diff --git a/src/spdk/dpdk/drivers/event/dpaa/meson.build b/src/spdk/dpdk/drivers/event/dpaa/meson.build new file mode 100644 index 000000000..2f761f8e5 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa/meson.build @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018 NXP + +if not is_linux + build = false + reason = 'only supported on linux' +endif +deps += ['pmd_dpaa', 'pmd_dpaa_sec'] +sources = files('dpaa_eventdev.c') + +includes += include_directories('../../crypto/dpaa_sec/') diff --git a/src/spdk/dpdk/drivers/event/dpaa/rte_pmd_dpaa_event_version.map b/src/spdk/dpdk/drivers/event/dpaa/rte_pmd_dpaa_event_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa/rte_pmd_dpaa_event_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/event/dpaa2/Makefile b/src/spdk/dpdk/drivers/event/dpaa2/Makefile new file mode 100644 index 000000000..75cf197c5 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa2/Makefile @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2017,2019 NXP +# + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_dpaa2_event.a + +CFLAGS += $(WERROR_FLAGS) + +CFLAGS += -I$(RTE_SDK)/drivers/bus/fslmc +CFLAGS += -I$(RTE_SDK)/drivers/bus/fslmc/qbman/include +CFLAGS += -I$(RTE_SDK)/drivers/bus/fslmc/mc +CFLAGS += -I$(RTE_SDK)/drivers/bus/fslmc/portal +CFLAGS += -I$(RTE_SDK)/drivers/mempool/dpaa2 +CFLAGS += -I$(RTE_SDK)/drivers/event/dpaa2 +CFLAGS += -I$(RTE_SDK)/drivers/common/dpaax +LDLIBS += -lrte_eal -lrte_eventdev +LDLIBS += -lrte_common_dpaax +LDLIBS += -lrte_bus_fslmc -lrte_mempool_dpaa2 -lrte_pmd_dpaa2 +LDLIBS += -lrte_bus_vdev -lrte_mempool -lrte_mbuf -lrte_ethdev +CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2 +CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2/mc + +LDLIBS += -lrte_pmd_dpaa2_sec +CFLAGS += -I$(RTE_SDK)/drivers/crypto/dpaa2_sec + +# versioning export map +EXPORT_MAP := rte_pmd_dpaa2_event_version.map + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV) += dpaa2_hw_dpcon.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV) += dpaa2_eventdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV) += dpaa2_eventdev_selftest.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c new file mode 100644 index 000000000..a196ad4c6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev.c @@ -0,0 +1,1214 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017,2019 NXP + */ + +#include <assert.h> +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <sys/epoll.h> + +#include <rte_atomic.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_dev.h> +#include <rte_eal.h> +#include <rte_fslmc.h> +#include <rte_lcore.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memcpy.h> +#include <rte_memory.h> +#include <rte_pci.h> +#include <rte_bus_vdev.h> +#include <rte_ethdev_driver.h> +#include <rte_cryptodev.h> +#include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> + +#include <fslmc_vfio.h> +#include <dpaa2_hw_pvt.h> +#include <dpaa2_hw_mempool.h> +#include <dpaa2_hw_dpio.h> +#include <dpaa2_ethdev.h> +#include <dpaa2_sec_event.h> +#include "dpaa2_eventdev.h" +#include "dpaa2_eventdev_logs.h" +#include <portal/dpaa2_hw_pvt.h> +#include <mc/fsl_dpci.h> + +/* Clarifications + * Evendev = SoC Instance + * Eventport = DPIO Instance + * Eventqueue = DPCON Instance + * 1 Eventdev can have N Eventqueue + * Soft Event Flow is DPCI Instance + */ + +/* Dynamic logging identified for mempool */ +int dpaa2_logtype_event; +#define DPAA2_EV_TX_RETRY_COUNT 10000 + +static uint16_t +dpaa2_eventdev_enqueue_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + + struct dpaa2_port *dpaa2_portal = port; + struct dpaa2_dpio_dev *dpio_dev; + uint32_t queue_id = ev[0].queue_id; + struct dpaa2_eventq *evq_info; + uint32_t fqid, retry_count; + struct qbman_swp *swp; + struct qbman_fd fd_arr[MAX_TX_RING_SLOTS]; + uint32_t loop, frames_to_send; + struct qbman_eq_desc eqdesc[MAX_TX_RING_SLOTS]; + uint16_t num_tx = 0; + int i, n, ret; + uint8_t channel_index; + + if (unlikely(!DPAA2_PER_LCORE_DPIO)) { + /* Affine current thread context to a qman portal */ + ret = dpaa2_affine_qbman_swp(); + if (ret < 0) { + DPAA2_EVENTDEV_ERR( + "Failed to allocate IO portal, tid: %d\n", + rte_gettid()); + return 0; + } + } + /* todo - dpaa2_portal shall have dpio_dev - no per thread variable */ + dpio_dev = DPAA2_PER_LCORE_DPIO; + swp = DPAA2_PER_LCORE_PORTAL; + + if (likely(dpaa2_portal->is_port_linked)) + goto skip_linking; + + /* Create mapping between portal and channel to receive packets */ + for (i = 0; i < DPAA2_EVENT_MAX_QUEUES; i++) { + evq_info = &dpaa2_portal->evq_info[i]; + if (!evq_info->event_port) + continue; + + ret = dpio_add_static_dequeue_channel(dpio_dev->dpio, + CMD_PRI_LOW, + dpio_dev->token, + evq_info->dpcon->dpcon_id, + &channel_index); + if (ret < 0) { + DPAA2_EVENTDEV_ERR( + "Static dequeue config failed: err(%d)", ret); + goto err; + } + + qbman_swp_push_set(swp, channel_index, 1); + evq_info->dpcon->channel_index = channel_index; + } + dpaa2_portal->is_port_linked = true; + +skip_linking: + evq_info = &dpaa2_portal->evq_info[queue_id]; + + while (nb_events) { + frames_to_send = (nb_events > dpaa2_eqcr_size) ? + dpaa2_eqcr_size : nb_events; + + for (loop = 0; loop < frames_to_send; loop++) { + const struct rte_event *event = &ev[num_tx + loop]; + + if (event->sched_type != RTE_SCHED_TYPE_ATOMIC) + fqid = evq_info->dpci->rx_queue[ + DPAA2_EVENT_DPCI_PARALLEL_QUEUE].fqid; + else + fqid = evq_info->dpci->rx_queue[ + DPAA2_EVENT_DPCI_ATOMIC_QUEUE].fqid; + + /* Prepare enqueue descriptor */ + qbman_eq_desc_clear(&eqdesc[loop]); + qbman_eq_desc_set_fq(&eqdesc[loop], fqid); + qbman_eq_desc_set_no_orp(&eqdesc[loop], 0); + qbman_eq_desc_set_response(&eqdesc[loop], 0, 0); + + if (event->sched_type == RTE_SCHED_TYPE_ATOMIC + && event->mbuf->seqn) { + uint8_t dqrr_index = event->mbuf->seqn - 1; + + qbman_eq_desc_set_dca(&eqdesc[loop], 1, + dqrr_index, 0); + DPAA2_PER_LCORE_DQRR_SIZE--; + DPAA2_PER_LCORE_DQRR_HELD &= ~(1 << dqrr_index); + } + + memset(&fd_arr[loop], 0, sizeof(struct qbman_fd)); + + /* + * todo - need to align with hw context data + * to avoid copy + */ + struct rte_event *ev_temp = rte_malloc(NULL, + sizeof(struct rte_event), 0); + + if (!ev_temp) { + if (!loop) + return num_tx; + frames_to_send = loop; + DPAA2_EVENTDEV_ERR( + "Unable to allocate event object"); + goto send_partial; + } + rte_memcpy(ev_temp, event, sizeof(struct rte_event)); + DPAA2_SET_FD_ADDR((&fd_arr[loop]), (size_t)ev_temp); + DPAA2_SET_FD_LEN((&fd_arr[loop]), + sizeof(struct rte_event)); + } +send_partial: + loop = 0; + retry_count = 0; + while (loop < frames_to_send) { + ret = qbman_swp_enqueue_multiple_desc(swp, + &eqdesc[loop], &fd_arr[loop], + frames_to_send - loop); + if (unlikely(ret < 0)) { + retry_count++; + if (retry_count > DPAA2_EV_TX_RETRY_COUNT) { + num_tx += loop; + nb_events -= loop; + return num_tx + loop; + } + } else { + loop += ret; + retry_count = 0; + } + } + num_tx += loop; + nb_events -= loop; + } + + return num_tx; +err: + for (n = 0; n < i; n++) { + evq_info = &dpaa2_portal->evq_info[n]; + if (!evq_info->event_port) + continue; + qbman_swp_push_set(swp, evq_info->dpcon->channel_index, 0); + dpio_remove_static_dequeue_channel(dpio_dev->dpio, 0, + dpio_dev->token, + evq_info->dpcon->dpcon_id); + } + return 0; + +} + +static uint16_t +dpaa2_eventdev_enqueue(void *port, const struct rte_event *ev) +{ + return dpaa2_eventdev_enqueue_burst(port, ev, 1); +} + +static void dpaa2_eventdev_dequeue_wait(uint64_t timeout_ticks) +{ + struct epoll_event epoll_ev; + + qbman_swp_interrupt_clear_status(DPAA2_PER_LCORE_PORTAL, + QBMAN_SWP_INTERRUPT_DQRI); + + epoll_wait(DPAA2_PER_LCORE_DPIO->epoll_fd, + &epoll_ev, 1, timeout_ticks); +} + +static void dpaa2_eventdev_process_parallel(struct qbman_swp *swp, + const struct qbman_fd *fd, + const struct qbman_result *dq, + struct dpaa2_queue *rxq, + struct rte_event *ev) +{ + struct rte_event *ev_temp = + (struct rte_event *)(size_t)DPAA2_GET_FD_ADDR(fd); + + RTE_SET_USED(rxq); + + rte_memcpy(ev, ev_temp, sizeof(struct rte_event)); + rte_free(ev_temp); + + qbman_swp_dqrr_consume(swp, dq); +} + +static void dpaa2_eventdev_process_atomic(struct qbman_swp *swp, + const struct qbman_fd *fd, + const struct qbman_result *dq, + struct dpaa2_queue *rxq, + struct rte_event *ev) +{ + struct rte_event *ev_temp = + (struct rte_event *)(size_t)DPAA2_GET_FD_ADDR(fd); + uint8_t dqrr_index = qbman_get_dqrr_idx(dq); + + RTE_SET_USED(swp); + RTE_SET_USED(rxq); + + rte_memcpy(ev, ev_temp, sizeof(struct rte_event)); + rte_free(ev_temp); + ev->mbuf->seqn = dqrr_index + 1; + DPAA2_PER_LCORE_DQRR_SIZE++; + DPAA2_PER_LCORE_DQRR_HELD |= 1 << dqrr_index; + DPAA2_PER_LCORE_DQRR_MBUF(dqrr_index) = ev->mbuf; +} + +static uint16_t +dpaa2_eventdev_dequeue_burst(void *port, struct rte_event ev[], + uint16_t nb_events, uint64_t timeout_ticks) +{ + const struct qbman_result *dq; + struct dpaa2_dpio_dev *dpio_dev = NULL; + struct dpaa2_port *dpaa2_portal = port; + struct dpaa2_eventq *evq_info; + struct qbman_swp *swp; + const struct qbman_fd *fd; + struct dpaa2_queue *rxq; + int num_pkts = 0, ret, i = 0, n; + uint8_t channel_index; + + if (unlikely(!DPAA2_PER_LCORE_DPIO)) { + /* Affine current thread context to a qman portal */ + ret = dpaa2_affine_qbman_swp(); + if (ret < 0) { + DPAA2_EVENTDEV_ERR( + "Failed to allocate IO portal, tid: %d\n", + rte_gettid()); + return 0; + } + } + + dpio_dev = DPAA2_PER_LCORE_DPIO; + swp = DPAA2_PER_LCORE_PORTAL; + + if (likely(dpaa2_portal->is_port_linked)) + goto skip_linking; + + /* Create mapping between portal and channel to receive packets */ + for (i = 0; i < DPAA2_EVENT_MAX_QUEUES; i++) { + evq_info = &dpaa2_portal->evq_info[i]; + if (!evq_info->event_port) + continue; + + ret = dpio_add_static_dequeue_channel(dpio_dev->dpio, + CMD_PRI_LOW, + dpio_dev->token, + evq_info->dpcon->dpcon_id, + &channel_index); + if (ret < 0) { + DPAA2_EVENTDEV_ERR( + "Static dequeue config failed: err(%d)", ret); + goto err; + } + + qbman_swp_push_set(swp, channel_index, 1); + evq_info->dpcon->channel_index = channel_index; + } + dpaa2_portal->is_port_linked = true; + +skip_linking: + /* Check if there are atomic contexts to be released */ + while (DPAA2_PER_LCORE_DQRR_SIZE) { + if (DPAA2_PER_LCORE_DQRR_HELD & (1 << i)) { + qbman_swp_dqrr_idx_consume(swp, i); + DPAA2_PER_LCORE_DQRR_SIZE--; + DPAA2_PER_LCORE_DQRR_MBUF(i)->seqn = + DPAA2_INVALID_MBUF_SEQN; + } + i++; + } + DPAA2_PER_LCORE_DQRR_HELD = 0; + + do { + dq = qbman_swp_dqrr_next(swp); + if (!dq) { + if (!num_pkts && timeout_ticks) { + dpaa2_eventdev_dequeue_wait(timeout_ticks); + timeout_ticks = 0; + continue; + } + return num_pkts; + } + qbman_swp_prefetch_dqrr_next(swp); + + fd = qbman_result_DQ_fd(dq); + rxq = (struct dpaa2_queue *)(size_t)qbman_result_DQ_fqd_ctx(dq); + if (rxq) { + rxq->cb(swp, fd, dq, rxq, &ev[num_pkts]); + } else { + qbman_swp_dqrr_consume(swp, dq); + DPAA2_EVENTDEV_ERR("Null Return VQ received"); + return 0; + } + + num_pkts++; + } while (num_pkts < nb_events); + + return num_pkts; +err: + for (n = 0; n < i; n++) { + evq_info = &dpaa2_portal->evq_info[n]; + if (!evq_info->event_port) + continue; + + qbman_swp_push_set(swp, evq_info->dpcon->channel_index, 0); + dpio_remove_static_dequeue_channel(dpio_dev->dpio, 0, + dpio_dev->token, + evq_info->dpcon->dpcon_id); + } + return 0; +} + +static uint16_t +dpaa2_eventdev_dequeue(void *port, struct rte_event *ev, + uint64_t timeout_ticks) +{ + return dpaa2_eventdev_dequeue_burst(port, ev, 1, timeout_ticks); +} + +static void +dpaa2_eventdev_info_get(struct rte_eventdev *dev, + struct rte_event_dev_info *dev_info) +{ + struct dpaa2_eventdev *priv = dev->data->dev_private; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + memset(dev_info, 0, sizeof(struct rte_event_dev_info)); + dev_info->min_dequeue_timeout_ns = + DPAA2_EVENT_MIN_DEQUEUE_TIMEOUT; + dev_info->max_dequeue_timeout_ns = + DPAA2_EVENT_MAX_DEQUEUE_TIMEOUT; + dev_info->dequeue_timeout_ns = + DPAA2_EVENT_PORT_DEQUEUE_TIMEOUT_NS; + dev_info->max_event_queues = priv->max_event_queues; + dev_info->max_event_queue_flows = + DPAA2_EVENT_MAX_QUEUE_FLOWS; + dev_info->max_event_queue_priority_levels = + DPAA2_EVENT_MAX_QUEUE_PRIORITY_LEVELS; + dev_info->max_event_priority_levels = + DPAA2_EVENT_MAX_EVENT_PRIORITY_LEVELS; + dev_info->max_event_ports = rte_fslmc_get_device_count(DPAA2_IO); + /* we only support dpio up to number of cores */ + if (dev_info->max_event_ports > rte_lcore_count()) + dev_info->max_event_ports = rte_lcore_count(); + dev_info->max_event_port_dequeue_depth = + DPAA2_EVENT_MAX_PORT_DEQUEUE_DEPTH; + dev_info->max_event_port_enqueue_depth = + DPAA2_EVENT_MAX_PORT_ENQUEUE_DEPTH; + dev_info->max_num_events = DPAA2_EVENT_MAX_NUM_EVENTS; + dev_info->event_dev_cap = RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED | + RTE_EVENT_DEV_CAP_BURST_MODE| + RTE_EVENT_DEV_CAP_RUNTIME_PORT_LINK | + RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT | + RTE_EVENT_DEV_CAP_NONSEQ_MODE; + +} + +static int +dpaa2_eventdev_configure(const struct rte_eventdev *dev) +{ + struct dpaa2_eventdev *priv = dev->data->dev_private; + struct rte_event_dev_config *conf = &dev->data->dev_conf; + + EVENTDEV_INIT_FUNC_TRACE(); + + priv->nb_event_queues = conf->nb_event_queues; + priv->nb_event_ports = conf->nb_event_ports; + priv->nb_event_queue_flows = conf->nb_event_queue_flows; + priv->nb_event_port_dequeue_depth = conf->nb_event_port_dequeue_depth; + priv->nb_event_port_enqueue_depth = conf->nb_event_port_enqueue_depth; + priv->event_dev_cfg = conf->event_dev_cfg; + + /* Check dequeue timeout method is per dequeue or global */ + if (priv->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) { + /* + * Use timeout value as given in dequeue operation. + * So invalidating this timeout value. + */ + priv->dequeue_timeout_ns = 0; + + } else if (conf->dequeue_timeout_ns == 0) { + priv->dequeue_timeout_ns = DPAA2_EVENT_PORT_DEQUEUE_TIMEOUT_NS; + } else { + priv->dequeue_timeout_ns = conf->dequeue_timeout_ns; + } + + DPAA2_EVENTDEV_DEBUG("Configured eventdev devid=%d", + dev->data->dev_id); + return 0; +} + +static int +dpaa2_eventdev_start(struct rte_eventdev *dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + return 0; +} + +static void +dpaa2_eventdev_stop(struct rte_eventdev *dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); +} + +static int +dpaa2_eventdev_close(struct rte_eventdev *dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + return 0; +} + +static void +dpaa2_eventdev_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id, + struct rte_event_queue_conf *queue_conf) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); + + queue_conf->nb_atomic_flows = DPAA2_EVENT_QUEUE_ATOMIC_FLOWS; + queue_conf->nb_atomic_order_sequences = + DPAA2_EVENT_QUEUE_ORDER_SEQUENCES; + queue_conf->schedule_type = RTE_SCHED_TYPE_PARALLEL; + queue_conf->priority = RTE_EVENT_DEV_PRIORITY_NORMAL; +} + +static int +dpaa2_eventdev_queue_setup(struct rte_eventdev *dev, uint8_t queue_id, + const struct rte_event_queue_conf *queue_conf) +{ + struct dpaa2_eventdev *priv = dev->data->dev_private; + struct dpaa2_eventq *evq_info = &priv->evq_info[queue_id]; + + EVENTDEV_INIT_FUNC_TRACE(); + + switch (queue_conf->schedule_type) { + case RTE_SCHED_TYPE_PARALLEL: + case RTE_SCHED_TYPE_ATOMIC: + case RTE_SCHED_TYPE_ORDERED: + break; + default: + DPAA2_EVENTDEV_ERR("Schedule type is not supported."); + return -1; + } + evq_info->event_queue_cfg = queue_conf->event_queue_cfg; + evq_info->event_queue_id = queue_id; + + return 0; +} + +static void +dpaa2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t queue_id) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); +} + +static void +dpaa2_eventdev_port_def_conf(struct rte_eventdev *dev, uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(port_id); + + port_conf->new_event_threshold = + DPAA2_EVENT_MAX_NUM_EVENTS; + port_conf->dequeue_depth = + DPAA2_EVENT_MAX_PORT_DEQUEUE_DEPTH; + port_conf->enqueue_depth = + DPAA2_EVENT_MAX_PORT_ENQUEUE_DEPTH; + port_conf->disable_implicit_release = 0; +} + +static int +dpaa2_eventdev_port_setup(struct rte_eventdev *dev, uint8_t port_id, + const struct rte_event_port_conf *port_conf) +{ + char event_port_name[32]; + struct dpaa2_port *portal; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(port_conf); + + sprintf(event_port_name, "event-port-%d", port_id); + portal = rte_malloc(event_port_name, sizeof(struct dpaa2_port), 0); + if (!portal) { + DPAA2_EVENTDEV_ERR("Memory allocation failure"); + return -ENOMEM; + } + + memset(portal, 0, sizeof(struct dpaa2_port)); + dev->data->ports[port_id] = portal; + return 0; +} + +static void +dpaa2_eventdev_port_release(void *port) +{ + struct dpaa2_port *portal = port; + + EVENTDEV_INIT_FUNC_TRACE(); + + /* TODO: Cleanup is required when ports are in linked state. */ + if (portal->is_port_linked) + DPAA2_EVENTDEV_WARN("Event port must be unlinked before release"); + + if (portal) + rte_free(portal); + + portal = NULL; +} + +static int +dpaa2_eventdev_port_link(struct rte_eventdev *dev, void *port, + const uint8_t queues[], const uint8_t priorities[], + uint16_t nb_links) +{ + struct dpaa2_eventdev *priv = dev->data->dev_private; + struct dpaa2_port *dpaa2_portal = port; + struct dpaa2_eventq *evq_info; + uint16_t i; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(priorities); + + for (i = 0; i < nb_links; i++) { + evq_info = &priv->evq_info[queues[i]]; + memcpy(&dpaa2_portal->evq_info[queues[i]], evq_info, + sizeof(struct dpaa2_eventq)); + dpaa2_portal->evq_info[queues[i]].event_port = port; + dpaa2_portal->num_linked_evq++; + } + + return (int)nb_links; +} + +static int +dpaa2_eventdev_port_unlink(struct rte_eventdev *dev, void *port, + uint8_t queues[], uint16_t nb_unlinks) +{ + struct dpaa2_port *dpaa2_portal = port; + int i; + struct dpaa2_dpio_dev *dpio_dev = NULL; + struct dpaa2_eventq *evq_info; + struct qbman_swp *swp; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(queues); + + for (i = 0; i < nb_unlinks; i++) { + evq_info = &dpaa2_portal->evq_info[queues[i]]; + + if (DPAA2_PER_LCORE_DPIO && evq_info->dpcon) { + /* todo dpaa2_portal shall have dpio_dev-no per lcore*/ + dpio_dev = DPAA2_PER_LCORE_DPIO; + swp = DPAA2_PER_LCORE_PORTAL; + + qbman_swp_push_set(swp, + evq_info->dpcon->channel_index, 0); + dpio_remove_static_dequeue_channel(dpio_dev->dpio, 0, + dpio_dev->token, + evq_info->dpcon->dpcon_id); + } + memset(evq_info, 0, sizeof(struct dpaa2_eventq)); + if (dpaa2_portal->num_linked_evq) + dpaa2_portal->num_linked_evq--; + } + + if (!dpaa2_portal->num_linked_evq) + dpaa2_portal->is_port_linked = false; + + return (int)nb_unlinks; +} + + +static int +dpaa2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns, + uint64_t *timeout_ticks) +{ + uint32_t scale = 1000*1000; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + *timeout_ticks = ns / scale; + + return 0; +} + +static void +dpaa2_eventdev_dump(struct rte_eventdev *dev, FILE *f) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(f); +} + +static int +dpaa2_eventdev_eth_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + uint32_t *caps) +{ + const char *ethdev_driver = eth_dev->device->driver->name; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + if (!strcmp(ethdev_driver, "net_dpaa2")) + *caps = RTE_EVENT_ETH_RX_ADAPTER_DPAA2_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + + return 0; +} + +static int +dpaa2_eventdev_eth_queue_add_all(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct dpaa2_eventdev *priv = dev->data->dev_private; + uint8_t ev_qid = queue_conf->ev.queue_id; + struct dpaa2_dpcon_dev *dpcon = priv->evq_info[ev_qid].dpcon; + int i, ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + ret = dpaa2_eth_eventq_attach(eth_dev, i, + dpcon, queue_conf); + if (ret) { + DPAA2_EVENTDEV_ERR( + "Event queue attach failed: err(%d)", ret); + goto fail; + } + } + return 0; +fail: + for (i = (i - 1); i >= 0 ; i--) + dpaa2_eth_eventq_detach(eth_dev, i); + + return ret; +} + +static int +dpaa2_eventdev_eth_queue_add(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct dpaa2_eventdev *priv = dev->data->dev_private; + uint8_t ev_qid = queue_conf->ev.queue_id; + struct dpaa2_dpcon_dev *dpcon = priv->evq_info[ev_qid].dpcon; + int ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + if (rx_queue_id == -1) + return dpaa2_eventdev_eth_queue_add_all(dev, + eth_dev, queue_conf); + + ret = dpaa2_eth_eventq_attach(eth_dev, rx_queue_id, + dpcon, queue_conf); + if (ret) { + DPAA2_EVENTDEV_ERR( + "Event queue attach failed: err(%d)", ret); + return ret; + } + return 0; +} + +static int +dpaa2_eventdev_eth_queue_del_all(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev) +{ + int i, ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + ret = dpaa2_eth_eventq_detach(eth_dev, i); + if (ret) { + DPAA2_EVENTDEV_ERR( + "Event queue detach failed: err(%d)", ret); + return ret; + } + } + + return 0; +} + +static int +dpaa2_eventdev_eth_queue_del(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + int ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + if (rx_queue_id == -1) + return dpaa2_eventdev_eth_queue_del_all(dev, eth_dev); + + ret = dpaa2_eth_eventq_detach(eth_dev, rx_queue_id); + if (ret) { + DPAA2_EVENTDEV_ERR( + "Event queue detach failed: err(%d)", ret); + return ret; + } + + return 0; +} + +static int +dpaa2_eventdev_eth_start(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +static int +dpaa2_eventdev_eth_stop(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +static int +dpaa2_eventdev_crypto_caps_get(const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev, + uint32_t *caps) +{ + const char *name = cdev->data->name; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + if (!strncmp(name, "dpsec-", 6)) + *caps = RTE_EVENT_CRYPTO_ADAPTER_DPAA2_CAP; + else + return -1; + + return 0; +} + +static int +dpaa2_eventdev_crypto_queue_add_all(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev, + const struct rte_event *ev) +{ + struct dpaa2_eventdev *priv = dev->data->dev_private; + uint8_t ev_qid = ev->queue_id; + struct dpaa2_dpcon_dev *dpcon = priv->evq_info[ev_qid].dpcon; + int i, ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + for (i = 0; i < cryptodev->data->nb_queue_pairs; i++) { + ret = dpaa2_sec_eventq_attach(cryptodev, i, dpcon, ev); + if (ret) { + DPAA2_EVENTDEV_ERR("dpaa2_sec_eventq_attach failed: ret %d\n", + ret); + goto fail; + } + } + return 0; +fail: + for (i = (i - 1); i >= 0 ; i--) + dpaa2_sec_eventq_detach(cryptodev, i); + + return ret; +} + +static int +dpaa2_eventdev_crypto_queue_add(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev, + int32_t rx_queue_id, + const struct rte_event *ev) +{ + struct dpaa2_eventdev *priv = dev->data->dev_private; + uint8_t ev_qid = ev->queue_id; + struct dpaa2_dpcon_dev *dpcon = priv->evq_info[ev_qid].dpcon; + int ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + if (rx_queue_id == -1) + return dpaa2_eventdev_crypto_queue_add_all(dev, + cryptodev, ev); + + ret = dpaa2_sec_eventq_attach(cryptodev, rx_queue_id, + dpcon, ev); + if (ret) { + DPAA2_EVENTDEV_ERR( + "dpaa2_sec_eventq_attach failed: ret: %d\n", ret); + return ret; + } + return 0; +} + +static int +dpaa2_eventdev_crypto_queue_del_all(const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev) +{ + int i, ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + + for (i = 0; i < cdev->data->nb_queue_pairs; i++) { + ret = dpaa2_sec_eventq_detach(cdev, i); + if (ret) { + DPAA2_EVENTDEV_ERR( + "dpaa2_sec_eventq_detach failed:ret %d\n", ret); + return ret; + } + } + + return 0; +} + +static int +dpaa2_eventdev_crypto_queue_del(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev, + int32_t rx_queue_id) +{ + int ret; + + EVENTDEV_INIT_FUNC_TRACE(); + + if (rx_queue_id == -1) + return dpaa2_eventdev_crypto_queue_del_all(dev, cryptodev); + + ret = dpaa2_sec_eventq_detach(cryptodev, rx_queue_id); + if (ret) { + DPAA2_EVENTDEV_ERR( + "dpaa2_sec_eventq_detach failed: ret: %d\n", ret); + return ret; + } + + return 0; +} + +static int +dpaa2_eventdev_crypto_start(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(cryptodev); + + return 0; +} + +static int +dpaa2_eventdev_crypto_stop(const struct rte_eventdev *dev, + const struct rte_cryptodev *cryptodev) +{ + EVENTDEV_INIT_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(cryptodev); + + return 0; +} + +static int +dpaa2_eventdev_tx_adapter_create(uint8_t id, + const struct rte_eventdev *dev) +{ + RTE_SET_USED(id); + RTE_SET_USED(dev); + + /* Nothing to do. Simply return. */ + return 0; +} + +static int +dpaa2_eventdev_tx_adapter_caps(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + uint32_t *caps) +{ + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + return 0; +} + +static uint16_t +dpaa2_eventdev_txa_enqueue_same_dest(void *port, + struct rte_event ev[], + uint16_t nb_events) +{ + struct rte_mbuf *m[DPAA2_EVENT_MAX_PORT_ENQUEUE_DEPTH], *m0; + uint8_t qid, i; + + RTE_SET_USED(port); + + m0 = (struct rte_mbuf *)ev[0].mbuf; + qid = rte_event_eth_tx_adapter_txq_get(m0); + + for (i = 0; i < nb_events; i++) + m[i] = (struct rte_mbuf *)ev[i].mbuf; + + return rte_eth_tx_burst(m0->port, qid, m, nb_events); +} + +static uint16_t +dpaa2_eventdev_txa_enqueue(void *port, + struct rte_event ev[], + uint16_t nb_events) +{ + struct rte_mbuf *m = (struct rte_mbuf *)ev[0].mbuf; + uint8_t qid, i; + + RTE_SET_USED(port); + + for (i = 0; i < nb_events; i++) { + qid = rte_event_eth_tx_adapter_txq_get(m); + rte_eth_tx_burst(m->port, qid, &m, 1); + } + + return nb_events; +} + +static struct rte_eventdev_ops dpaa2_eventdev_ops = { + .dev_infos_get = dpaa2_eventdev_info_get, + .dev_configure = dpaa2_eventdev_configure, + .dev_start = dpaa2_eventdev_start, + .dev_stop = dpaa2_eventdev_stop, + .dev_close = dpaa2_eventdev_close, + .queue_def_conf = dpaa2_eventdev_queue_def_conf, + .queue_setup = dpaa2_eventdev_queue_setup, + .queue_release = dpaa2_eventdev_queue_release, + .port_def_conf = dpaa2_eventdev_port_def_conf, + .port_setup = dpaa2_eventdev_port_setup, + .port_release = dpaa2_eventdev_port_release, + .port_link = dpaa2_eventdev_port_link, + .port_unlink = dpaa2_eventdev_port_unlink, + .timeout_ticks = dpaa2_eventdev_timeout_ticks, + .dump = dpaa2_eventdev_dump, + .dev_selftest = test_eventdev_dpaa2, + .eth_rx_adapter_caps_get = dpaa2_eventdev_eth_caps_get, + .eth_rx_adapter_queue_add = dpaa2_eventdev_eth_queue_add, + .eth_rx_adapter_queue_del = dpaa2_eventdev_eth_queue_del, + .eth_rx_adapter_start = dpaa2_eventdev_eth_start, + .eth_rx_adapter_stop = dpaa2_eventdev_eth_stop, + .eth_tx_adapter_caps_get = dpaa2_eventdev_tx_adapter_caps, + .eth_tx_adapter_create = dpaa2_eventdev_tx_adapter_create, + .crypto_adapter_caps_get = dpaa2_eventdev_crypto_caps_get, + .crypto_adapter_queue_pair_add = dpaa2_eventdev_crypto_queue_add, + .crypto_adapter_queue_pair_del = dpaa2_eventdev_crypto_queue_del, + .crypto_adapter_start = dpaa2_eventdev_crypto_start, + .crypto_adapter_stop = dpaa2_eventdev_crypto_stop, +}; + +static int +dpaa2_eventdev_setup_dpci(struct dpaa2_dpci_dev *dpci_dev, + struct dpaa2_dpcon_dev *dpcon_dev) +{ + struct dpci_rx_queue_cfg rx_queue_cfg; + int ret, i; + + /*Do settings to get the frame on a DPCON object*/ + rx_queue_cfg.options = DPCI_QUEUE_OPT_DEST | + DPCI_QUEUE_OPT_USER_CTX; + rx_queue_cfg.dest_cfg.dest_type = DPCI_DEST_DPCON; + rx_queue_cfg.dest_cfg.dest_id = dpcon_dev->dpcon_id; + rx_queue_cfg.dest_cfg.priority = DPAA2_EVENT_DEFAULT_DPCI_PRIO; + + dpci_dev->rx_queue[DPAA2_EVENT_DPCI_PARALLEL_QUEUE].cb = + dpaa2_eventdev_process_parallel; + dpci_dev->rx_queue[DPAA2_EVENT_DPCI_ATOMIC_QUEUE].cb = + dpaa2_eventdev_process_atomic; + + for (i = 0 ; i < DPAA2_EVENT_DPCI_MAX_QUEUES; i++) { + rx_queue_cfg.user_ctx = (size_t)(&dpci_dev->rx_queue[i]); + ret = dpci_set_rx_queue(&dpci_dev->dpci, + CMD_PRI_LOW, + dpci_dev->token, i, + &rx_queue_cfg); + if (ret) { + DPAA2_EVENTDEV_ERR( + "DPCI Rx queue setup failed: err(%d)", + ret); + return ret; + } + } + return 0; +} + +static int +dpaa2_eventdev_create(const char *name) +{ + struct rte_eventdev *eventdev; + struct dpaa2_eventdev *priv; + struct dpaa2_dpcon_dev *dpcon_dev = NULL; + struct dpaa2_dpci_dev *dpci_dev = NULL; + int ret; + + eventdev = rte_event_pmd_vdev_init(name, + sizeof(struct dpaa2_eventdev), + rte_socket_id()); + if (eventdev == NULL) { + DPAA2_EVENTDEV_ERR("Failed to create Event device %s", name); + goto fail; + } + + eventdev->dev_ops = &dpaa2_eventdev_ops; + eventdev->enqueue = dpaa2_eventdev_enqueue; + eventdev->enqueue_burst = dpaa2_eventdev_enqueue_burst; + eventdev->enqueue_new_burst = dpaa2_eventdev_enqueue_burst; + eventdev->enqueue_forward_burst = dpaa2_eventdev_enqueue_burst; + eventdev->dequeue = dpaa2_eventdev_dequeue; + eventdev->dequeue_burst = dpaa2_eventdev_dequeue_burst; + eventdev->txa_enqueue = dpaa2_eventdev_txa_enqueue; + eventdev->txa_enqueue_same_dest = dpaa2_eventdev_txa_enqueue_same_dest; + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + priv = eventdev->data->dev_private; + priv->max_event_queues = 0; + + do { + dpcon_dev = rte_dpaa2_alloc_dpcon_dev(); + if (!dpcon_dev) + break; + priv->evq_info[priv->max_event_queues].dpcon = dpcon_dev; + + dpci_dev = rte_dpaa2_alloc_dpci_dev(); + if (!dpci_dev) { + rte_dpaa2_free_dpcon_dev(dpcon_dev); + break; + } + priv->evq_info[priv->max_event_queues].dpci = dpci_dev; + + ret = dpaa2_eventdev_setup_dpci(dpci_dev, dpcon_dev); + if (ret) { + DPAA2_EVENTDEV_ERR( + "DPCI setup failed: err(%d)", ret); + return ret; + } + priv->max_event_queues++; + } while (dpcon_dev && dpci_dev); + + RTE_LOG(INFO, PMD, "%s eventdev created\n", name); + + return 0; +fail: + return -EFAULT; +} + +static int +dpaa2_eventdev_destroy(const char *name) +{ + struct rte_eventdev *eventdev; + struct dpaa2_eventdev *priv; + int i; + + eventdev = rte_event_pmd_get_named_dev(name); + if (eventdev == NULL) { + RTE_EDEV_LOG_ERR("eventdev with name %s not allocated", name); + return -1; + } + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + priv = eventdev->data->dev_private; + for (i = 0; i < priv->max_event_queues; i++) { + if (priv->evq_info[i].dpcon) + rte_dpaa2_free_dpcon_dev(priv->evq_info[i].dpcon); + + if (priv->evq_info[i].dpci) + rte_dpaa2_free_dpci_dev(priv->evq_info[i].dpci); + + } + priv->max_event_queues = 0; + + RTE_LOG(INFO, PMD, "%s eventdev cleaned\n", name); + return 0; +} + + +static int +dpaa2_eventdev_probe(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + DPAA2_EVENTDEV_INFO("Initializing %s", name); + return dpaa2_eventdev_create(name); +} + +static int +dpaa2_eventdev_remove(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + DPAA2_EVENTDEV_INFO("Closing %s", name); + + dpaa2_eventdev_destroy(name); + + return rte_event_pmd_vdev_uninit(name); +} + +static struct rte_vdev_driver vdev_eventdev_dpaa2_pmd = { + .probe = dpaa2_eventdev_probe, + .remove = dpaa2_eventdev_remove +}; + +RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_DPAA2_PMD, vdev_eventdev_dpaa2_pmd); + +RTE_INIT(dpaa2_eventdev_init_log) +{ + dpaa2_logtype_event = rte_log_register("pmd.event.dpaa2"); + if (dpaa2_logtype_event >= 0) + rte_log_set_level(dpaa2_logtype_event, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev.h b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev.h new file mode 100644 index 000000000..785e52032 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 NXP + */ + +#ifndef __DPAA2_EVENTDEV_H__ +#define __DPAA2_EVENTDEV_H__ + +#include <rte_eventdev_pmd.h> +#include <rte_eventdev_pmd_vdev.h> +#include <rte_atomic.h> +#include <mc/fsl_dpcon.h> +#include <mc/fsl_mc_sys.h> + +#define EVENTDEV_NAME_DPAA2_PMD event_dpaa2 + +#define DPAA2_EVENT_DEFAULT_DPCI_PRIO 0 + +#define DPAA2_EVENT_MAX_QUEUES 16 +#define DPAA2_EVENT_MIN_DEQUEUE_TIMEOUT 1 +#define DPAA2_EVENT_MAX_DEQUEUE_TIMEOUT (UINT32_MAX - 1) +#define DPAA2_EVENT_PORT_DEQUEUE_TIMEOUT_NS 100UL +#define DPAA2_EVENT_MAX_QUEUE_FLOWS 2048 +#define DPAA2_EVENT_MAX_QUEUE_PRIORITY_LEVELS 8 +#define DPAA2_EVENT_MAX_EVENT_PRIORITY_LEVELS 0 +#define DPAA2_EVENT_MAX_PORT_DEQUEUE_DEPTH 8 +#define DPAA2_EVENT_MAX_PORT_ENQUEUE_DEPTH 8 +#define DPAA2_EVENT_MAX_NUM_EVENTS (INT32_MAX - 1) + +#define DPAA2_EVENT_QUEUE_ATOMIC_FLOWS 2048 +#define DPAA2_EVENT_QUEUE_ORDER_SEQUENCES 2048 + +enum { + DPAA2_EVENT_DPCI_PARALLEL_QUEUE, + DPAA2_EVENT_DPCI_ATOMIC_QUEUE, + DPAA2_EVENT_DPCI_MAX_QUEUES +}; + +#define RTE_EVENT_ETH_RX_ADAPTER_DPAA2_CAP \ + (RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | \ + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | \ + RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID | \ + RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT) + +/**< Crypto Rx adapter cap to return If the packet transfers from + * the cryptodev to eventdev with DPAA2 devices. + */ +#define RTE_EVENT_CRYPTO_ADAPTER_DPAA2_CAP \ + (RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW | \ + RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND | \ + RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA) + +/**< Ethernet Rx adapter cap to return If the packet transfers from + * the ethdev to eventdev with DPAA2 devices. + */ + +struct dpaa2_eventq { + /* DPcon device */ + struct dpaa2_dpcon_dev *dpcon; + /* Attached DPCI device */ + struct dpaa2_dpci_dev *dpci; + /* Mapped event port */ + struct dpaa2_io_portal_t *event_port; + /* Configuration provided by the user */ + uint32_t event_queue_cfg; + uint32_t event_queue_id; +}; + +struct dpaa2_port { + struct dpaa2_eventq evq_info[DPAA2_EVENT_MAX_QUEUES]; + uint8_t num_linked_evq; + uint8_t is_port_linked; + uint64_t timeout_us; +}; + +struct dpaa2_eventdev { + struct dpaa2_eventq evq_info[DPAA2_EVENT_MAX_QUEUES]; + uint32_t dequeue_timeout_ns; + uint8_t max_event_queues; + uint8_t nb_event_queues; + uint8_t nb_event_ports; + uint8_t resvd_1; + uint32_t nb_event_queue_flows; + uint32_t nb_event_port_dequeue_depth; + uint32_t nb_event_port_enqueue_depth; + uint32_t event_dev_cfg; +}; + +struct dpaa2_dpcon_dev *rte_dpaa2_alloc_dpcon_dev(void); +void rte_dpaa2_free_dpcon_dev(struct dpaa2_dpcon_dev *dpcon); + +int test_eventdev_dpaa2(void); + +#endif /* __DPAA2_EVENTDEV_H__ */ diff --git a/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev_logs.h b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev_logs.h new file mode 100644 index 000000000..5da85c60f --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev_logs.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 NXP + */ + +#ifndef _DPAA2_EVENTDEV_LOGS_H_ +#define _DPAA2_EVENTDEV_LOGS_H_ + +extern int dpaa2_logtype_event; + +#define DPAA2_EVENTDEV_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, dpaa2_logtype_event, "dpaa2_event: " \ + fmt "\n", ##args) + +#define DPAA2_EVENTDEV_DEBUG(fmt, args...) \ + rte_log(RTE_LOG_DEBUG, dpaa2_logtype_event, "dpaa2_event: %s(): " \ + fmt "\n", __func__, ##args) + +#define EVENTDEV_INIT_FUNC_TRACE() DPAA2_EVENTDEV_DEBUG(" >>") + +#define DPAA2_EVENTDEV_INFO(fmt, args...) \ + DPAA2_EVENTDEV_LOG(INFO, fmt, ## args) +#define DPAA2_EVENTDEV_ERR(fmt, args...) \ + DPAA2_EVENTDEV_LOG(ERR, fmt, ## args) +#define DPAA2_EVENTDEV_WARN(fmt, args...) \ + DPAA2_EVENTDEV_LOG(WARNING, fmt, ## args) + +/* DP Logs, toggled out at compile time if level lower than current level */ +#define DPAA2_EVENTDEV_DP_LOG(level, fmt, args...) \ + RTE_LOG_DP(level, PMD, fmt, ## args) + +#define DPAA2_EVENTDEV_DP_DEBUG(fmt, args...) \ + DPAA2_EVENTDEV_DP_LOG(DEBUG, fmt, ## args) +#define DPAA2_EVENTDEV_DP_INFO(fmt, args...) \ + DPAA2_EVENTDEV_DP_LOG(INFO, fmt, ## args) +#define DPAA2_EVENTDEV_DP_WARN(fmt, args...) \ + DPAA2_EVENTDEV_DP_LOG(WARNING, fmt, ## args) + +#define dpaa2_evdev_info(fmt, ...) DPAA2_EVENTDEV_LOG(INFO, fmt, ##__VA_ARGS__) +#define dpaa2_evdev_dbg(fmt, ...) DPAA2_EVENTDEV_LOG(DEBUG, fmt, ##__VA_ARGS__) +#define dpaa2_evdev_err(fmt, ...) DPAA2_EVENTDEV_LOG(ERR, fmt, ##__VA_ARGS__) +#define dpaa2_evdev__func_trace dpaa2_evdev_dbg +#define dpaa2_evdev_selftest dpaa2_evdev_info + +#endif /* _DPAA2_EVENTDEV_LOGS_H_ */ diff --git a/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev_selftest.c b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev_selftest.c new file mode 100644 index 000000000..ba4f4bd23 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_eventdev_selftest.c @@ -0,0 +1,833 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018-2019 NXP + */ + +#include <rte_atomic.h> +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_debug.h> +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_eventdev.h> +#include <rte_hexdump.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_memcpy.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_random.h> +#include <rte_bus_vdev.h> +#include <rte_test.h> + +#include "dpaa2_eventdev.h" +#include "dpaa2_eventdev_logs.h" + +#define MAX_PORTS 4 +#define NUM_PACKETS (1 << 18) +#define MAX_EVENTS 8 +#define DPAA2_TEST_RUN(setup, teardown, test) \ + dpaa2_test_run(setup, teardown, test, #test) + +static int total; +static int passed; +static int failed; +static int unsupported; + +static int evdev; +static struct rte_mempool *eventdev_test_mempool; + +struct event_attr { + uint32_t flow_id; + uint8_t event_type; + uint8_t sub_event_type; + uint8_t sched_type; + uint8_t queue; + uint8_t port; + uint8_t seq; +}; + +static uint32_t seqn_list_index; +static int seqn_list[NUM_PACKETS]; + +static void +seqn_list_init(void) +{ + RTE_BUILD_BUG_ON(NUM_PACKETS < MAX_EVENTS); + memset(seqn_list, 0, sizeof(seqn_list)); + seqn_list_index = 0; +} + +struct test_core_param { + rte_atomic32_t *total_events; + uint64_t dequeue_tmo_ticks; + uint8_t port; + uint8_t sched_type; +}; + +static int +testsuite_setup(void) +{ + const char *eventdev_name = "event_dpaa2"; + + evdev = rte_event_dev_get_dev_id(eventdev_name); + if (evdev < 0) { + dpaa2_evdev_dbg("%d: Eventdev %s not found - creating.", + __LINE__, eventdev_name); + if (rte_vdev_init(eventdev_name, NULL) < 0) { + dpaa2_evdev_err("Error creating eventdev %s", + eventdev_name); + return -1; + } + evdev = rte_event_dev_get_dev_id(eventdev_name); + if (evdev < 0) { + dpaa2_evdev_err("Error finding newly created eventdev"); + return -1; + } + } + + return 0; +} + +static void +testsuite_teardown(void) +{ + rte_event_dev_close(evdev); +} + +static void +devconf_set_default_sane_values(struct rte_event_dev_config *dev_conf, + struct rte_event_dev_info *info) +{ + memset(dev_conf, 0, sizeof(struct rte_event_dev_config)); + dev_conf->dequeue_timeout_ns = info->min_dequeue_timeout_ns; + dev_conf->nb_event_ports = info->max_event_ports; + dev_conf->nb_event_queues = info->max_event_queues; + dev_conf->nb_event_queue_flows = info->max_event_queue_flows; + dev_conf->nb_event_port_dequeue_depth = + info->max_event_port_dequeue_depth; + dev_conf->nb_event_port_enqueue_depth = + info->max_event_port_enqueue_depth; + dev_conf->nb_event_port_enqueue_depth = + info->max_event_port_enqueue_depth; + dev_conf->nb_events_limit = + info->max_num_events; +} + +enum { + TEST_EVENTDEV_SETUP_DEFAULT, + TEST_EVENTDEV_SETUP_PRIORITY, + TEST_EVENTDEV_SETUP_DEQUEUE_TIMEOUT, +}; + +static int +_eventdev_setup(int mode) +{ + int i, ret; + struct rte_event_dev_config dev_conf; + struct rte_event_dev_info info; + const char *pool_name = "evdev_dpaa2_test_pool"; + + /* Create and destrory pool for each test case to make it standalone */ + eventdev_test_mempool = rte_pktmbuf_pool_create(pool_name, + MAX_EVENTS, + 0 /*MBUF_CACHE_SIZE*/, + 0, + 512, /* Use very small mbufs */ + rte_socket_id()); + if (!eventdev_test_mempool) { + dpaa2_evdev_err("ERROR creating mempool"); + return -1; + } + + ret = rte_event_dev_info_get(evdev, &info); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get event dev info"); + RTE_TEST_ASSERT(info.max_num_events >= (int32_t)MAX_EVENTS, + "ERROR max_num_events=%d < max_events=%d", + info.max_num_events, MAX_EVENTS); + + devconf_set_default_sane_values(&dev_conf, &info); + if (mode == TEST_EVENTDEV_SETUP_DEQUEUE_TIMEOUT) + dev_conf.event_dev_cfg |= RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT; + + ret = rte_event_dev_configure(evdev, &dev_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to configure eventdev"); + + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + + if (mode == TEST_EVENTDEV_SETUP_PRIORITY) { + if (queue_count > 8) { + dpaa2_evdev_err( + "test expects the unique priority per queue"); + return -ENOTSUP; + } + + /* Configure event queues(0 to n) with + * RTE_EVENT_DEV_PRIORITY_HIGHEST to + * RTE_EVENT_DEV_PRIORITY_LOWEST + */ + uint8_t step = (RTE_EVENT_DEV_PRIORITY_LOWEST + 1) / + queue_count; + for (i = 0; i < (int)queue_count; i++) { + struct rte_event_queue_conf queue_conf; + + ret = rte_event_queue_default_conf_get(evdev, i, + &queue_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get def_conf%d", + i); + queue_conf.priority = i * step; + ret = rte_event_queue_setup(evdev, i, &queue_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup queue=%d", + i); + } + + } else { + /* Configure event queues with default priority */ + for (i = 0; i < (int)queue_count; i++) { + ret = rte_event_queue_setup(evdev, i, NULL); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup queue=%d", + i); + } + } + /* Configure event ports */ + uint32_t port_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &port_count), "Port count get failed"); + for (i = 0; i < (int)port_count; i++) { + ret = rte_event_port_setup(evdev, i, NULL); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup port=%d", i); + ret = rte_event_port_link(evdev, i, NULL, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, "Failed to link all queues port=%d", + i); + } + + ret = rte_event_dev_start(evdev); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to start device"); + + return 0; +} + +static int +eventdev_setup(void) +{ + return _eventdev_setup(TEST_EVENTDEV_SETUP_DEFAULT); +} + +static void +eventdev_teardown(void) +{ + rte_event_dev_stop(evdev); + rte_mempool_free(eventdev_test_mempool); +} + +static void +update_event_and_validation_attr(struct rte_mbuf *m, struct rte_event *ev, + uint32_t flow_id, uint8_t event_type, + uint8_t sub_event_type, uint8_t sched_type, + uint8_t queue, uint8_t port, uint8_t seq) +{ + struct event_attr *attr; + + /* Store the event attributes in mbuf for future reference */ + attr = rte_pktmbuf_mtod(m, struct event_attr *); + attr->flow_id = flow_id; + attr->event_type = event_type; + attr->sub_event_type = sub_event_type; + attr->sched_type = sched_type; + attr->queue = queue; + attr->port = port; + attr->seq = seq; + + ev->flow_id = flow_id; + ev->sub_event_type = sub_event_type; + ev->event_type = event_type; + /* Inject the new event */ + ev->op = RTE_EVENT_OP_NEW; + ev->sched_type = sched_type; + ev->queue_id = queue; + ev->mbuf = m; +} + +static int +inject_events(uint32_t flow_id, uint8_t event_type, uint8_t sub_event_type, + uint8_t sched_type, uint8_t queue, uint8_t port, + unsigned int events) +{ + struct rte_mbuf *m; + unsigned int i; + + for (i = 0; i < events; i++) { + struct rte_event ev = {.event = 0, .u64 = 0}; + + m = rte_pktmbuf_alloc(eventdev_test_mempool); + RTE_TEST_ASSERT_NOT_NULL(m, "mempool alloc failed"); + + update_event_and_validation_attr(m, &ev, flow_id, event_type, + sub_event_type, sched_type, queue, port, i); + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + return 0; +} + +static int +check_excess_events(uint8_t port) +{ + int i; + uint16_t valid_event; + struct rte_event ev; + + /* Check for excess events, try for a few times and exit */ + for (i = 0; i < 32; i++) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + + RTE_TEST_ASSERT_SUCCESS(valid_event, + "Unexpected valid event=%d", ev.mbuf->seqn); + } + return 0; +} + +static int +generate_random_events(const unsigned int total_events) +{ + struct rte_event_dev_info info; + unsigned int i; + int ret; + + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + + ret = rte_event_dev_info_get(evdev, &info); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get event dev info"); + for (i = 0; i < total_events; i++) { + ret = inject_events( + rte_rand() % info.max_event_queue_flows /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + rte_rand() % queue_count /* queue */, + 0 /* port */, + 1 /* events */); + if (ret) + return -1; + } + return ret; +} + + +static int +validate_event(struct rte_event *ev) +{ + struct event_attr *attr; + + attr = rte_pktmbuf_mtod(ev->mbuf, struct event_attr *); + RTE_TEST_ASSERT_EQUAL(attr->flow_id, ev->flow_id, + "flow_id mismatch enq=%d deq =%d", + attr->flow_id, ev->flow_id); + RTE_TEST_ASSERT_EQUAL(attr->event_type, ev->event_type, + "event_type mismatch enq=%d deq =%d", + attr->event_type, ev->event_type); + RTE_TEST_ASSERT_EQUAL(attr->sub_event_type, ev->sub_event_type, + "sub_event_type mismatch enq=%d deq =%d", + attr->sub_event_type, ev->sub_event_type); + RTE_TEST_ASSERT_EQUAL(attr->sched_type, ev->sched_type, + "sched_type mismatch enq=%d deq =%d", + attr->sched_type, ev->sched_type); + RTE_TEST_ASSERT_EQUAL(attr->queue, ev->queue_id, + "queue mismatch enq=%d deq =%d", + attr->queue, ev->queue_id); + return 0; +} + +typedef int (*validate_event_cb)(uint32_t index, uint8_t port, + struct rte_event *ev); + +static int +consume_events(uint8_t port, const uint32_t total_events, validate_event_cb fn) +{ + int ret; + uint16_t valid_event; + uint32_t events = 0, forward_progress_cnt = 0, index = 0; + struct rte_event ev; + + while (1) { + if (++forward_progress_cnt > UINT16_MAX) { + dpaa2_evdev_err("Detected deadlock"); + return -1; + } + + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + forward_progress_cnt = 0; + ret = validate_event(&ev); + if (ret) + return -1; + + if (fn != NULL) { + ret = fn(index, port, &ev); + RTE_TEST_ASSERT_SUCCESS(ret, + "Failed to validate test specific event"); + } + + ++index; + + rte_pktmbuf_free(ev.mbuf); + if (++events >= total_events) + break; + } + + return check_excess_events(port); +} + +static int +validate_simple_enqdeq(uint32_t index, uint8_t port, struct rte_event *ev) +{ + struct event_attr *attr; + + attr = rte_pktmbuf_mtod(ev->mbuf, struct event_attr *); + + RTE_SET_USED(port); + RTE_TEST_ASSERT_EQUAL(index, attr->seq, + "index=%d != seqn=%d", index, attr->seq); + return 0; +} + +static int +test_simple_enqdeq(uint8_t sched_type) +{ + int ret; + + ret = inject_events(0 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type */, + sched_type, + 0 /* queue */, + 0 /* port */, + MAX_EVENTS); + if (ret) + return -1; + + return consume_events(0 /* port */, MAX_EVENTS, validate_simple_enqdeq); +} + +static int +test_simple_enqdeq_atomic(void) +{ + return test_simple_enqdeq(RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_simple_enqdeq_parallel(void) +{ + return test_simple_enqdeq(RTE_SCHED_TYPE_PARALLEL); +} + +/* + * Generate a prescribed number of events and spread them across available + * queues. On dequeue, using single event port(port 0) verify the enqueued + * event attributes + */ +static int +test_multi_queue_enq_single_port_deq(void) +{ + int ret; + + ret = generate_random_events(MAX_EVENTS); + if (ret) + return -1; + + return consume_events(0 /* port */, MAX_EVENTS, NULL); +} + +static int +worker_multi_port_fn(void *arg) +{ + struct test_core_param *param = arg; + struct rte_event ev; + uint16_t valid_event; + uint8_t port = param->port; + rte_atomic32_t *total_events = param->total_events; + int ret; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + ret = validate_event(&ev); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event"); + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } + return 0; +} + +static int +wait_workers_to_join(int lcore, const rte_atomic32_t *count) +{ + uint64_t cycles, print_cycles; + + RTE_SET_USED(count); + + print_cycles = cycles = rte_get_timer_cycles(); + while (rte_eal_get_lcore_state(lcore) != FINISHED) { + uint64_t new_cycles = rte_get_timer_cycles(); + + if (new_cycles - print_cycles > rte_get_timer_hz()) { + dpaa2_evdev_dbg("\r%s: events %d", __func__, + rte_atomic32_read(count)); + print_cycles = new_cycles; + } + if (new_cycles - cycles > rte_get_timer_hz() * 10) { + dpaa2_evdev_info( + "%s: No schedules for seconds, deadlock (%d)", + __func__, + rte_atomic32_read(count)); + rte_event_dev_dump(evdev, stdout); + cycles = new_cycles; + return -1; + } + } + rte_eal_mp_wait_lcore(); + return 0; +} + + +static int +launch_workers_and_wait(int (*master_worker)(void *), + int (*slave_workers)(void *), uint32_t total_events, + uint8_t nb_workers, uint8_t sched_type) +{ + uint8_t port = 0; + int w_lcore; + int ret; + struct test_core_param *param; + rte_atomic32_t atomic_total_events; + uint64_t dequeue_tmo_ticks; + + if (!nb_workers) + return 0; + + rte_atomic32_set(&atomic_total_events, total_events); + seqn_list_init(); + + param = malloc(sizeof(struct test_core_param) * nb_workers); + if (!param) + return -1; + + ret = rte_event_dequeue_timeout_ticks(evdev, + rte_rand() % 10000000/* 10ms */, &dequeue_tmo_ticks); + if (ret) { + free(param); + return -1; + } + + param[0].total_events = &atomic_total_events; + param[0].sched_type = sched_type; + param[0].port = 0; + param[0].dequeue_tmo_ticks = dequeue_tmo_ticks; + rte_smp_wmb(); + + w_lcore = rte_get_next_lcore( + /* start core */ -1, + /* skip master */ 1, + /* wrap */ 0); + rte_eal_remote_launch(master_worker, ¶m[0], w_lcore); + + for (port = 1; port < nb_workers; port++) { + param[port].total_events = &atomic_total_events; + param[port].sched_type = sched_type; + param[port].port = port; + param[port].dequeue_tmo_ticks = dequeue_tmo_ticks; + rte_smp_wmb(); + w_lcore = rte_get_next_lcore(w_lcore, 1, 0); + rte_eal_remote_launch(slave_workers, ¶m[port], w_lcore); + } + + ret = wait_workers_to_join(w_lcore, &atomic_total_events); + free(param); + return ret; +} + +/* + * Generate a prescribed number of events and spread them across available + * queues. Dequeue the events through multiple ports and verify the enqueued + * event attributes + */ +static int +test_multi_queue_enq_multi_port_deq(void) +{ + const unsigned int total_events = MAX_EVENTS; + uint32_t nr_ports; + int ret; + + ret = generate_random_events(total_events); + if (ret) + return -1; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &nr_ports), "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (!nr_ports) { + dpaa2_evdev_err("%s: Not enough ports=%d or workers=%d", + __func__, nr_ports, rte_lcore_count() - 1); + return 0; + } + + return launch_workers_and_wait(worker_multi_port_fn, + worker_multi_port_fn, total_events, + nr_ports, 0xff /* invalid */); +} + +static +void flush(uint8_t dev_id, struct rte_event event, void *arg) +{ + unsigned int *count = arg; + + RTE_SET_USED(dev_id); + if (event.event_type == RTE_EVENT_TYPE_CPU) + *count = *count + 1; + +} + +static int +test_dev_stop_flush(void) +{ + unsigned int total_events = MAX_EVENTS, count = 0; + int ret; + + ret = generate_random_events(total_events); + if (ret) + return -1; + + ret = rte_event_dev_stop_flush_callback_register(evdev, flush, &count); + if (ret) + return -2; + rte_event_dev_stop(evdev); + ret = rte_event_dev_stop_flush_callback_register(evdev, NULL, NULL); + if (ret) + return -3; + RTE_TEST_ASSERT_EQUAL(total_events, count, + "count mismatch total_events=%d count=%d", + total_events, count); + return 0; +} + +static int +validate_queue_to_port_single_link(uint32_t index, uint8_t port, + struct rte_event *ev) +{ + RTE_SET_USED(index); + RTE_TEST_ASSERT_EQUAL(port, ev->queue_id, + "queue mismatch enq=%d deq =%d", + port, ev->queue_id); + return 0; +} + +/* + * Link queue x to port x and check correctness of link by checking + * queue_id == x on dequeue on the specific port x + */ +static int +test_queue_to_port_single_link(void) +{ + int i, nr_links, ret; + + uint32_t port_count; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &port_count), "Port count get failed"); + + /* Unlink all connections that created in eventdev_setup */ + for (i = 0; i < (int)port_count; i++) { + ret = rte_event_port_unlink(evdev, i, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, + "Failed to unlink all queues port=%d", i); + } + + uint32_t queue_count; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + + nr_links = RTE_MIN(port_count, queue_count); + const unsigned int total_events = MAX_EVENTS / nr_links; + + /* Link queue x to port x and inject events to queue x through port x */ + for (i = 0; i < nr_links; i++) { + uint8_t queue = (uint8_t)i; + + ret = rte_event_port_link(evdev, i, &queue, NULL, 1); + RTE_TEST_ASSERT(ret == 1, "Failed to link queue to port %d", i); + + ret = inject_events( + 0x100 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + queue /* queue */, + i /* port */, + total_events /* events */); + if (ret) + return -1; + } + + /* Verify the events generated from correct queue */ + for (i = 0; i < nr_links; i++) { + ret = consume_events(i /* port */, total_events, + validate_queue_to_port_single_link); + if (ret) + return -1; + } + + return 0; +} + +static int +validate_queue_to_port_multi_link(uint32_t index, uint8_t port, + struct rte_event *ev) +{ + RTE_SET_USED(index); + RTE_TEST_ASSERT_EQUAL(port, (ev->queue_id & 0x1), + "queue mismatch enq=%d deq =%d", + port, ev->queue_id); + return 0; +} + +/* + * Link all even number of queues to port 0 and all odd number of queues to + * port 1 and verify the link connection on dequeue + */ +static int +test_queue_to_port_multi_link(void) +{ + int ret, port0_events = 0, port1_events = 0; + uint8_t queue, port; + uint32_t nr_queues = 0; + uint32_t nr_ports = 0; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &nr_queues), "Queue count get failed"); + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &nr_queues), "Queue count get failed"); + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &nr_ports), "Port count get failed"); + + if (nr_ports < 2) { + dpaa2_evdev_err("%s: Not enough ports to test ports=%d", + __func__, nr_ports); + return 0; + } + + /* Unlink all connections that created in eventdev_setup */ + for (port = 0; port < nr_ports; port++) { + ret = rte_event_port_unlink(evdev, port, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, "Failed to unlink all queues port=%d", + port); + } + + const unsigned int total_events = MAX_EVENTS / nr_queues; + + /* Link all even number of queues to port0 and odd numbers to port 1*/ + for (queue = 0; queue < nr_queues; queue++) { + port = queue & 0x1; + ret = rte_event_port_link(evdev, port, &queue, NULL, 1); + RTE_TEST_ASSERT(ret == 1, "Failed to link queue=%d to port=%d", + queue, port); + + ret = inject_events( + 0x100 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + queue /* queue */, + port /* port */, + total_events /* events */); + if (ret) + return -1; + + if (port == 0) + port0_events += total_events; + else + port1_events += total_events; + } + + ret = consume_events(0 /* port */, port0_events, + validate_queue_to_port_multi_link); + if (ret) + return -1; + ret = consume_events(1 /* port */, port1_events, + validate_queue_to_port_multi_link); + if (ret) + return -1; + + return 0; +} + +static void dpaa2_test_run(int (*setup)(void), void (*tdown)(void), + int (*test)(void), const char *name) +{ + if (setup() < 0) { + RTE_LOG(INFO, PMD, "Error setting up test %s", name); + unsupported++; + } else { + if (test() < 0) { + failed++; + RTE_LOG(INFO, PMD, "%s Failed\n", name); + } else { + passed++; + RTE_LOG(INFO, PMD, "%s Passed", name); + } + } + + total++; + tdown(); +} + +int +test_eventdev_dpaa2(void) +{ + testsuite_setup(); + + DPAA2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_simple_enqdeq_atomic); + DPAA2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_simple_enqdeq_parallel); + DPAA2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_queue_enq_single_port_deq); + DPAA2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_dev_stop_flush); + DPAA2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_queue_enq_multi_port_deq); + DPAA2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_queue_to_port_single_link); + DPAA2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_queue_to_port_multi_link); + + DPAA2_EVENTDEV_INFO("Total tests : %d", total); + DPAA2_EVENTDEV_INFO("Passed : %d", passed); + DPAA2_EVENTDEV_INFO("Failed : %d", failed); + DPAA2_EVENTDEV_INFO("Not supported : %d", unsupported); + + testsuite_teardown(); + + if (failed) + return -1; + + return 0; +} diff --git a/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_hw_dpcon.c b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_hw_dpcon.c new file mode 100644 index 000000000..200b71640 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa2/dpaa2_hw_dpcon.c @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * + * Copyright 2017 NXP + * + */ + +#include <unistd.h> +#include <stdio.h> +#include <sys/types.h> +#include <string.h> +#include <stdlib.h> +#include <fcntl.h> +#include <errno.h> + +#include <rte_malloc.h> +#include <rte_memcpy.h> +#include <rte_string_fns.h> +#include <rte_cycles.h> +#include <rte_kvargs.h> +#include <rte_dev.h> +#include <rte_ethdev_driver.h> + +#include <rte_fslmc.h> +#include <mc/fsl_dpcon.h> +#include <portal/dpaa2_hw_pvt.h> +#include "dpaa2_eventdev.h" +#include "dpaa2_eventdev_logs.h" + +TAILQ_HEAD(dpcon_dev_list, dpaa2_dpcon_dev); +static struct dpcon_dev_list dpcon_dev_list + = TAILQ_HEAD_INITIALIZER(dpcon_dev_list); /*!< DPCON device list */ + +static int +rte_dpaa2_create_dpcon_device(int dev_fd __rte_unused, + struct vfio_device_info *obj_info __rte_unused, + int dpcon_id) +{ + struct dpaa2_dpcon_dev *dpcon_node; + struct dpcon_attr attr; + int ret; + + /* Allocate DPAA2 dpcon handle */ + dpcon_node = rte_malloc(NULL, sizeof(struct dpaa2_dpcon_dev), 0); + if (!dpcon_node) { + DPAA2_EVENTDEV_ERR( + "Memory allocation failed for dpcon device"); + return -1; + } + + /* Open the dpcon object */ + dpcon_node->dpcon.regs = dpaa2_get_mcp_ptr(MC_PORTAL_INDEX); + ret = dpcon_open(&dpcon_node->dpcon, + CMD_PRI_LOW, dpcon_id, &dpcon_node->token); + if (ret) { + DPAA2_EVENTDEV_ERR("Unable to open dpcon device: err(%d)", + ret); + rte_free(dpcon_node); + return -1; + } + + /* Get the device attributes */ + ret = dpcon_get_attributes(&dpcon_node->dpcon, + CMD_PRI_LOW, dpcon_node->token, &attr); + if (ret != 0) { + DPAA2_EVENTDEV_ERR("dpcon attribute fetch failed: err(%d)", + ret); + rte_free(dpcon_node); + return -1; + } + + /* Updating device specific private information*/ + dpcon_node->qbman_ch_id = attr.qbman_ch_id; + dpcon_node->num_priorities = attr.num_priorities; + dpcon_node->dpcon_id = dpcon_id; + rte_atomic16_init(&dpcon_node->in_use); + + TAILQ_INSERT_TAIL(&dpcon_dev_list, dpcon_node, next); + + return 0; +} + +struct dpaa2_dpcon_dev *rte_dpaa2_alloc_dpcon_dev(void) +{ + struct dpaa2_dpcon_dev *dpcon_dev = NULL; + + /* Get DPCON dev handle from list using index */ + TAILQ_FOREACH(dpcon_dev, &dpcon_dev_list, next) { + if (dpcon_dev && rte_atomic16_test_and_set(&dpcon_dev->in_use)) + break; + } + + return dpcon_dev; +} + +void rte_dpaa2_free_dpcon_dev(struct dpaa2_dpcon_dev *dpcon) +{ + struct dpaa2_dpcon_dev *dpcon_dev = NULL; + + /* Match DPCON handle and mark it free */ + TAILQ_FOREACH(dpcon_dev, &dpcon_dev_list, next) { + if (dpcon_dev == dpcon) { + rte_atomic16_dec(&dpcon_dev->in_use); + return; + } + } +} + +static struct rte_dpaa2_object rte_dpaa2_dpcon_obj = { + .dev_type = DPAA2_CON, + .create = rte_dpaa2_create_dpcon_device, +}; + +RTE_PMD_REGISTER_DPAA2_OBJECT(dpcon, rte_dpaa2_dpcon_obj); diff --git a/src/spdk/dpdk/drivers/event/dpaa2/meson.build b/src/spdk/dpdk/drivers/event/dpaa2/meson.build new file mode 100644 index 000000000..71c8be3d6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa2/meson.build @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright 2018 NXP + +if not is_linux + build = false + reason = 'only supported on linux' +endif +deps += ['bus_vdev', 'pmd_dpaa2', 'pmd_dpaa2_sec'] +sources = files('dpaa2_hw_dpcon.c', + 'dpaa2_eventdev.c', + 'dpaa2_eventdev_selftest.c') + +includes += include_directories('../../crypto/dpaa2_sec/') diff --git a/src/spdk/dpdk/drivers/event/dpaa2/rte_pmd_dpaa2_event_version.map b/src/spdk/dpdk/drivers/event/dpaa2/rte_pmd_dpaa2_event_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dpaa2/rte_pmd_dpaa2_event_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/event/dsw/Makefile b/src/spdk/dpdk/drivers/event/dsw/Makefile new file mode 100644 index 000000000..f6e7dda1f --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dsw/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Ericsson AB + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_pmd_dsw_event.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +ifneq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) +CFLAGS += -Wno-format-nonliteral +endif + +LDLIBS += -lrte_eal +LDLIBS += -lrte_mbuf +LDLIBS += -lrte_mempool +LDLIBS += -lrte_ring +LDLIBS += -lrte_eventdev +LDLIBS += -lrte_bus_vdev + +EXPORT_MAP := rte_pmd_dsw_event_version.map + +SRCS-$(CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV) += \ + dsw_evdev.c dsw_event.c dsw_xstats.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/event/dsw/dsw_evdev.c b/src/spdk/dpdk/drivers/event/dsw/dsw_evdev.c new file mode 100644 index 000000000..e796975df --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dsw/dsw_evdev.c @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Ericsson AB + */ + +#include <stdbool.h> + +#include <rte_cycles.h> +#include <rte_eventdev_pmd.h> +#include <rte_eventdev_pmd_vdev.h> +#include <rte_random.h> +#include <rte_ring_elem.h> + +#include "dsw_evdev.h" + +#define EVENTDEV_NAME_DSW_PMD event_dsw + +static int +dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id, + const struct rte_event_port_conf *conf) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + struct dsw_port *port; + struct rte_event_ring *in_ring; + struct rte_ring *ctl_in_ring; + char ring_name[RTE_RING_NAMESIZE]; + + port = &dsw->ports[port_id]; + + *port = (struct dsw_port) { + .id = port_id, + .dsw = dsw, + .dequeue_depth = conf->dequeue_depth, + .enqueue_depth = conf->enqueue_depth, + .new_event_threshold = conf->new_event_threshold + }; + + snprintf(ring_name, sizeof(ring_name), "dsw%d_p%u", dev->data->dev_id, + port_id); + + in_ring = rte_event_ring_create(ring_name, DSW_IN_RING_SIZE, + dev->data->socket_id, + RING_F_SC_DEQ|RING_F_EXACT_SZ); + + if (in_ring == NULL) + return -ENOMEM; + + snprintf(ring_name, sizeof(ring_name), "dswctl%d_p%u", + dev->data->dev_id, port_id); + + ctl_in_ring = rte_ring_create_elem(ring_name, + sizeof(struct dsw_ctl_msg), + DSW_CTL_IN_RING_SIZE, + dev->data->socket_id, + RING_F_SC_DEQ|RING_F_EXACT_SZ); + + if (ctl_in_ring == NULL) { + rte_event_ring_free(in_ring); + return -ENOMEM; + } + + port->in_ring = in_ring; + port->ctl_in_ring = ctl_in_ring; + + rte_atomic16_init(&port->load); + rte_atomic32_init(&port->immigration_load); + + port->load_update_interval = + (DSW_LOAD_UPDATE_INTERVAL * rte_get_timer_hz()) / US_PER_S; + + port->migration_interval = + (DSW_MIGRATION_INTERVAL * rte_get_timer_hz()) / US_PER_S; + + dev->data->ports[port_id] = port; + + return 0; +} + +static void +dsw_port_def_conf(struct rte_eventdev *dev __rte_unused, + uint8_t port_id __rte_unused, + struct rte_event_port_conf *port_conf) +{ + *port_conf = (struct rte_event_port_conf) { + .new_event_threshold = 1024, + .dequeue_depth = DSW_MAX_PORT_DEQUEUE_DEPTH / 4, + .enqueue_depth = DSW_MAX_PORT_ENQUEUE_DEPTH / 4 + }; +} + +static void +dsw_port_release(void *p) +{ + struct dsw_port *port = p; + + rte_event_ring_free(port->in_ring); + rte_ring_free(port->ctl_in_ring); +} + +static int +dsw_queue_setup(struct rte_eventdev *dev, uint8_t queue_id, + const struct rte_event_queue_conf *conf) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + struct dsw_queue *queue = &dsw->queues[queue_id]; + + if (RTE_EVENT_QUEUE_CFG_ALL_TYPES & conf->event_queue_cfg) + return -ENOTSUP; + + /* SINGLE_LINK is better off treated as TYPE_ATOMIC, since it + * avoid the "fake" TYPE_PARALLEL flow_id assignment. Since + * the queue will only have a single serving port, no + * migration will ever happen, so the extra TYPE_ATOMIC + * migration overhead is avoided. + */ + if (RTE_EVENT_QUEUE_CFG_SINGLE_LINK & conf->event_queue_cfg) + queue->schedule_type = RTE_SCHED_TYPE_ATOMIC; + else { + if (conf->schedule_type == RTE_SCHED_TYPE_ORDERED) + return -ENOTSUP; + /* atomic or parallel */ + queue->schedule_type = conf->schedule_type; + } + + queue->num_serving_ports = 0; + + return 0; +} + +static void +dsw_queue_def_conf(struct rte_eventdev *dev __rte_unused, + uint8_t queue_id __rte_unused, + struct rte_event_queue_conf *queue_conf) +{ + *queue_conf = (struct rte_event_queue_conf) { + .nb_atomic_flows = 4096, + .schedule_type = RTE_SCHED_TYPE_ATOMIC, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL + }; +} + +static void +dsw_queue_release(struct rte_eventdev *dev __rte_unused, + uint8_t queue_id __rte_unused) +{ +} + +static void +queue_add_port(struct dsw_queue *queue, uint16_t port_id) +{ + queue->serving_ports[queue->num_serving_ports] = port_id; + queue->num_serving_ports++; +} + +static bool +queue_remove_port(struct dsw_queue *queue, uint16_t port_id) +{ + uint16_t i; + + for (i = 0; i < queue->num_serving_ports; i++) + if (queue->serving_ports[i] == port_id) { + uint16_t last_idx = queue->num_serving_ports - 1; + if (i != last_idx) + queue->serving_ports[i] = + queue->serving_ports[last_idx]; + queue->num_serving_ports--; + return true; + } + return false; +} + +static int +dsw_port_link_unlink(struct rte_eventdev *dev, void *port, + const uint8_t queues[], uint16_t num, bool link) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + struct dsw_port *p = port; + uint16_t i; + uint16_t count = 0; + + for (i = 0; i < num; i++) { + uint8_t qid = queues[i]; + struct dsw_queue *q = &dsw->queues[qid]; + if (link) { + queue_add_port(q, p->id); + count++; + } else { + bool removed = queue_remove_port(q, p->id); + if (removed) + count++; + } + } + + return count; +} + +static int +dsw_port_link(struct rte_eventdev *dev, void *port, const uint8_t queues[], + const uint8_t priorities[] __rte_unused, uint16_t num) +{ + return dsw_port_link_unlink(dev, port, queues, num, true); +} + +static int +dsw_port_unlink(struct rte_eventdev *dev, void *port, uint8_t queues[], + uint16_t num) +{ + return dsw_port_link_unlink(dev, port, queues, num, false); +} + +static void +dsw_info_get(struct rte_eventdev *dev __rte_unused, + struct rte_event_dev_info *info) +{ + *info = (struct rte_event_dev_info) { + .driver_name = DSW_PMD_NAME, + .max_event_queues = DSW_MAX_QUEUES, + .max_event_queue_flows = DSW_MAX_FLOWS, + .max_event_queue_priority_levels = 1, + .max_event_priority_levels = 1, + .max_event_ports = DSW_MAX_PORTS, + .max_event_port_dequeue_depth = DSW_MAX_PORT_DEQUEUE_DEPTH, + .max_event_port_enqueue_depth = DSW_MAX_PORT_ENQUEUE_DEPTH, + .max_num_events = DSW_MAX_EVENTS, + .event_dev_cap = RTE_EVENT_DEV_CAP_BURST_MODE| + RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED| + RTE_EVENT_DEV_CAP_NONSEQ_MODE| + RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT + }; +} + +static int +dsw_configure(const struct rte_eventdev *dev) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + const struct rte_event_dev_config *conf = &dev->data->dev_conf; + int32_t min_max_in_flight; + + dsw->num_ports = conf->nb_event_ports; + dsw->num_queues = conf->nb_event_queues; + + /* Avoid a situation where consumer ports are holding all the + * credits, without making use of them. + */ + min_max_in_flight = conf->nb_event_ports * DSW_PORT_MAX_CREDITS; + + dsw->max_inflight = RTE_MAX(conf->nb_events_limit, min_max_in_flight); + + return 0; +} + + +static void +initial_flow_to_port_assignment(struct dsw_evdev *dsw) +{ + uint8_t queue_id; + for (queue_id = 0; queue_id < dsw->num_queues; queue_id++) { + struct dsw_queue *queue = &dsw->queues[queue_id]; + uint16_t flow_hash; + for (flow_hash = 0; flow_hash < DSW_MAX_FLOWS; flow_hash++) { + uint8_t port_idx = + rte_rand() % queue->num_serving_ports; + uint8_t port_id = + queue->serving_ports[port_idx]; + dsw->queues[queue_id].flow_to_port_map[flow_hash] = + port_id; + } + } +} + +static int +dsw_start(struct rte_eventdev *dev) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + uint16_t i; + uint64_t now; + + rte_atomic32_init(&dsw->credits_on_loan); + + initial_flow_to_port_assignment(dsw); + + now = rte_get_timer_cycles(); + for (i = 0; i < dsw->num_ports; i++) { + dsw->ports[i].measurement_start = now; + dsw->ports[i].busy_start = now; + } + + return 0; +} + +static void +dsw_port_drain_buf(uint8_t dev_id, struct rte_event *buf, uint16_t buf_len, + eventdev_stop_flush_t flush, void *flush_arg) +{ + uint16_t i; + + for (i = 0; i < buf_len; i++) + flush(dev_id, buf[i], flush_arg); +} + +static void +dsw_port_drain_paused(uint8_t dev_id, struct dsw_port *port, + eventdev_stop_flush_t flush, void *flush_arg) +{ + dsw_port_drain_buf(dev_id, port->paused_events, port->paused_events_len, + flush, flush_arg); +} + +static void +dsw_port_drain_out(uint8_t dev_id, struct dsw_evdev *dsw, struct dsw_port *port, + eventdev_stop_flush_t flush, void *flush_arg) +{ + uint16_t dport_id; + + for (dport_id = 0; dport_id < dsw->num_ports; dport_id++) + if (dport_id != port->id) + dsw_port_drain_buf(dev_id, port->out_buffer[dport_id], + port->out_buffer_len[dport_id], + flush, flush_arg); +} + +static void +dsw_port_drain_in_ring(uint8_t dev_id, struct dsw_port *port, + eventdev_stop_flush_t flush, void *flush_arg) +{ + struct rte_event ev; + + while (rte_event_ring_dequeue_burst(port->in_ring, &ev, 1, NULL)) + flush(dev_id, ev, flush_arg); +} + +static void +dsw_drain(uint8_t dev_id, struct dsw_evdev *dsw, + eventdev_stop_flush_t flush, void *flush_arg) +{ + uint16_t port_id; + + if (flush == NULL) + return; + + for (port_id = 0; port_id < dsw->num_ports; port_id++) { + struct dsw_port *port = &dsw->ports[port_id]; + + dsw_port_drain_out(dev_id, dsw, port, flush, flush_arg); + dsw_port_drain_paused(dev_id, port, flush, flush_arg); + dsw_port_drain_in_ring(dev_id, port, flush, flush_arg); + } +} + +static void +dsw_stop(struct rte_eventdev *dev) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + uint8_t dev_id; + eventdev_stop_flush_t flush; + void *flush_arg; + + dev_id = dev->data->dev_id; + flush = dev->dev_ops->dev_stop_flush; + flush_arg = dev->data->dev_stop_flush_arg; + + dsw_drain(dev_id, dsw, flush, flush_arg); +} + +static int +dsw_close(struct rte_eventdev *dev) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + + dsw->num_ports = 0; + dsw->num_queues = 0; + + return 0; +} + +static struct rte_eventdev_ops dsw_evdev_ops = { + .port_setup = dsw_port_setup, + .port_def_conf = dsw_port_def_conf, + .port_release = dsw_port_release, + .queue_setup = dsw_queue_setup, + .queue_def_conf = dsw_queue_def_conf, + .queue_release = dsw_queue_release, + .port_link = dsw_port_link, + .port_unlink = dsw_port_unlink, + .dev_infos_get = dsw_info_get, + .dev_configure = dsw_configure, + .dev_start = dsw_start, + .dev_stop = dsw_stop, + .dev_close = dsw_close, + .xstats_get = dsw_xstats_get, + .xstats_get_names = dsw_xstats_get_names, + .xstats_get_by_name = dsw_xstats_get_by_name +}; + +static int +dsw_probe(struct rte_vdev_device *vdev) +{ + const char *name; + struct rte_eventdev *dev; + struct dsw_evdev *dsw; + + name = rte_vdev_device_name(vdev); + + dev = rte_event_pmd_vdev_init(name, sizeof(struct dsw_evdev), + rte_socket_id()); + if (dev == NULL) + return -EFAULT; + + dev->dev_ops = &dsw_evdev_ops; + dev->enqueue = dsw_event_enqueue; + dev->enqueue_burst = dsw_event_enqueue_burst; + dev->enqueue_new_burst = dsw_event_enqueue_new_burst; + dev->enqueue_forward_burst = dsw_event_enqueue_forward_burst; + dev->dequeue = dsw_event_dequeue; + dev->dequeue_burst = dsw_event_dequeue_burst; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + dsw = dev->data->dev_private; + dsw->data = dev->data; + + return 0; +} + +static int +dsw_remove(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + if (name == NULL) + return -EINVAL; + + return rte_event_pmd_vdev_uninit(name); +} + +static struct rte_vdev_driver evdev_dsw_pmd_drv = { + .probe = dsw_probe, + .remove = dsw_remove +}; + +RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_DSW_PMD, evdev_dsw_pmd_drv); diff --git a/src/spdk/dpdk/drivers/event/dsw/dsw_evdev.h b/src/spdk/dpdk/drivers/event/dsw/dsw_evdev.h new file mode 100644 index 000000000..6cb77cfc4 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dsw/dsw_evdev.h @@ -0,0 +1,298 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Ericsson AB + */ + +#ifndef _DSW_EVDEV_H_ +#define _DSW_EVDEV_H_ + +#include <rte_event_ring.h> +#include <rte_eventdev.h> + +#define DSW_PMD_NAME RTE_STR(event_dsw) + +#define DSW_MAX_PORTS (64) +#define DSW_MAX_PORT_DEQUEUE_DEPTH (128) +#define DSW_MAX_PORT_ENQUEUE_DEPTH (128) +#define DSW_MAX_PORT_OUT_BUFFER (32) + +#define DSW_MAX_QUEUES (16) + +#define DSW_MAX_EVENTS (16384) + +/* Multiple 24-bit flow ids will map to the same DSW-level flow. The + * number of DSW flows should be high enough make it unlikely that + * flow ids of several large flows hash to the same DSW-level flow. + * Such collisions will limit parallism and thus the number of cores + * that may be utilized. However, configuring a large number of DSW + * flows might potentially, depending on traffic and actual + * application flow id value range, result in each such DSW-level flow + * being very small. The effect of migrating such flows will be small, + * in terms amount of processing load redistributed. This will in turn + * reduce the load balancing speed, since flow migration rate has an + * upper limit. Code changes are required to allow > 32k DSW-level + * flows. + */ +#define DSW_MAX_FLOWS_BITS (13) +#define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS)) +#define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1) + +/* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows, + * but the 'dsw' scheduler (more or less) randomly assign flow id to + * events on parallel queues, to be able to reuse some of the + * migration mechanism and scheduling logic from + * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a + * particular port, the likely-hood of events being scheduled to this + * port is reduced, and thus a kind of statistical load balancing is + * achieved. + */ +#define DSW_PARALLEL_FLOWS (1024) + +/* 'Background tasks' are polling the control rings for * + * migration-related messages, or flush the output buffer (so + * buffered events doesn't linger too long). Shouldn't be too low, + * since the system won't benefit from the 'batching' effects from + * the output buffer, and shouldn't be too high, since it will make + * buffered events linger too long in case the port goes idle. + */ +#define DSW_MAX_PORT_OPS_PER_BG_TASK (128) + +/* Avoid making small 'loans' from the central in-flight event credit + * pool, to improve efficiency. + */ +#define DSW_MIN_CREDIT_LOAN (64) +#define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN) +#define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN) + +/* The rings are dimensioned so that all in-flight events can reside + * on any one of the port rings, to avoid the trouble of having to + * care about the case where there's no room on the destination port's + * input ring. + */ +#define DSW_IN_RING_SIZE (DSW_MAX_EVENTS) + +#define DSW_MAX_LOAD (INT16_MAX) +#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100)) +#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD) + +/* The thought behind keeping the load update interval shorter than + * the migration interval is that the load from newly migrated flows + * should 'show up' on the load measurement before new migrations are + * considered. This is to avoid having too many flows, from too many + * source ports, to be migrated too quickly to a lightly loaded port - + * in particular since this might cause the system to oscillate. + */ +#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4) +#define DSW_OLD_LOAD_WEIGHT (1) + +/* The minimum time (in us) between two flow migrations. What puts an + * upper limit on the actual migration rate is primarily the pace in + * which the ports send and receive control messages, which in turn is + * largely a function of how much cycles are spent the processing of + * an event burst. + */ +#define DSW_MIGRATION_INTERVAL (1000) +#define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70)) +#define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95)) +#define DSW_REBALANCE_THRESHOLD (DSW_LOAD_FROM_PERCENT(3)) + +#define DSW_MAX_EVENTS_RECORDED (128) + +#define DSW_MAX_FLOWS_PER_MIGRATION (8) + +/* Only one outstanding migration per port is allowed */ +#define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION) + +/* Enough room for paus request/confirm and unpaus request/confirm for + * all possible senders. + */ +#define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4) + +/* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of + * dequeue(), arrange events so that events with the same flow id on + * the same queue forms a back-to-back "burst", and also so that such + * bursts of different flow ids, but on the same queue, also come + * consecutively. All this in an attempt to improve data and + * instruction cache usage for the application, at the cost of a + * scheduler overhead increase. + */ + +/* #define DSW_SORT_DEQUEUED */ + +struct dsw_queue_flow { + uint8_t queue_id; + uint16_t flow_hash; +}; + +enum dsw_migration_state { + DSW_MIGRATION_STATE_IDLE, + DSW_MIGRATION_STATE_PAUSING, + DSW_MIGRATION_STATE_FORWARDING, + DSW_MIGRATION_STATE_UNPAUSING +}; + +struct dsw_port { + uint16_t id; + + /* Keeping a pointer here to avoid container_of() calls, which + * are expensive since they are very frequent and will result + * in an integer multiplication (since the port id is an index + * into the dsw_evdev port array). + */ + struct dsw_evdev *dsw; + + uint16_t dequeue_depth; + uint16_t enqueue_depth; + + int32_t inflight_credits; + + int32_t new_event_threshold; + + uint16_t pending_releases; + + uint16_t next_parallel_flow_id; + + uint16_t ops_since_bg_task; + + /* most recent 'background' processing */ + uint64_t last_bg; + + /* For port load measurement. */ + uint64_t next_load_update; + uint64_t load_update_interval; + uint64_t measurement_start; + uint64_t busy_start; + uint64_t busy_cycles; + uint64_t total_busy_cycles; + + /* For the ctl interface and flow migration mechanism. */ + uint64_t next_emigration; + uint64_t migration_interval; + enum dsw_migration_state migration_state; + + uint64_t emigration_start; + uint64_t emigrations; + uint64_t emigration_latency; + + uint8_t emigration_target_port_ids[DSW_MAX_FLOWS_PER_MIGRATION]; + struct dsw_queue_flow + emigration_target_qfs[DSW_MAX_FLOWS_PER_MIGRATION]; + uint8_t emigration_targets_len; + uint8_t cfm_cnt; + + uint64_t immigrations; + + uint16_t paused_flows_len; + struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS]; + + /* In a very contrived worst case all inflight events can be + * laying around paused here. + */ + uint16_t paused_events_len; + struct rte_event paused_events[DSW_MAX_EVENTS]; + + uint16_t seen_events_len; + uint16_t seen_events_idx; + struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED]; + + uint64_t enqueue_calls; + uint64_t new_enqueued; + uint64_t forward_enqueued; + uint64_t release_enqueued; + uint64_t queue_enqueued[DSW_MAX_QUEUES]; + + uint64_t dequeue_calls; + uint64_t dequeued; + uint64_t queue_dequeued[DSW_MAX_QUEUES]; + + uint16_t out_buffer_len[DSW_MAX_PORTS]; + struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER]; + + uint16_t in_buffer_len; + uint16_t in_buffer_start; + /* This buffer may contain events that were read up from the + * in_ring during the flow migration process. + */ + struct rte_event in_buffer[DSW_MAX_EVENTS]; + + struct rte_event_ring *in_ring __rte_cache_aligned; + + struct rte_ring *ctl_in_ring __rte_cache_aligned; + + /* Estimate of current port load. */ + rte_atomic16_t load __rte_cache_aligned; + /* Estimate of flows currently migrating to this port. */ + rte_atomic32_t immigration_load __rte_cache_aligned; +} __rte_cache_aligned; + +struct dsw_queue { + uint8_t schedule_type; + uint8_t serving_ports[DSW_MAX_PORTS]; + uint16_t num_serving_ports; + + uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned; +}; + +struct dsw_evdev { + struct rte_eventdev_data *data; + + struct dsw_port ports[DSW_MAX_PORTS]; + uint16_t num_ports; + struct dsw_queue queues[DSW_MAX_QUEUES]; + uint8_t num_queues; + int32_t max_inflight; + + rte_atomic32_t credits_on_loan __rte_cache_aligned; +}; + +#define DSW_CTL_PAUS_REQ (0) +#define DSW_CTL_UNPAUS_REQ (1) +#define DSW_CTL_CFM (2) + +struct dsw_ctl_msg { + uint8_t type; + uint8_t originating_port_id; + uint8_t qfs_len; + struct dsw_queue_flow qfs[DSW_MAX_FLOWS_PER_MIGRATION]; +} __rte_aligned(4); + +uint16_t dsw_event_enqueue(void *port, const struct rte_event *event); +uint16_t dsw_event_enqueue_burst(void *port, + const struct rte_event events[], + uint16_t events_len); +uint16_t dsw_event_enqueue_new_burst(void *port, + const struct rte_event events[], + uint16_t events_len); +uint16_t dsw_event_enqueue_forward_burst(void *port, + const struct rte_event events[], + uint16_t events_len); + +uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); +uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events, + uint16_t num, uint64_t wait); + +int dsw_xstats_get_names(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int size); +int dsw_xstats_get(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + const unsigned int ids[], uint64_t values[], unsigned int n); +uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev, + const char *name, unsigned int *id); + +static inline struct dsw_evdev * +dsw_pmd_priv(const struct rte_eventdev *eventdev) +{ + return eventdev->data->dev_private; +} + +#define DSW_LOG_DP(level, fmt, args...) \ + RTE_LOG_DP(level, EVENTDEV, "[%s] %s() line %u: " fmt, \ + DSW_PMD_NAME, \ + __func__, __LINE__, ## args) + +#define DSW_LOG_DP_PORT(level, port_id, fmt, args...) \ + DSW_LOG_DP(level, "<Port %d> " fmt, port_id, ## args) + +#endif diff --git a/src/spdk/dpdk/drivers/event/dsw/dsw_event.c b/src/spdk/dpdk/drivers/event/dsw/dsw_event.c new file mode 100644 index 000000000..611b36781 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dsw/dsw_event.c @@ -0,0 +1,1395 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Ericsson AB + */ + +#include "dsw_evdev.h" + +#ifdef DSW_SORT_DEQUEUED +#include "dsw_sort.h" +#endif + +#include <stdbool.h> +#include <string.h> + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_memcpy.h> +#include <rte_random.h> + +static bool +dsw_port_acquire_credits(struct dsw_evdev *dsw, struct dsw_port *port, + int32_t credits) +{ + int32_t inflight_credits = port->inflight_credits; + int32_t missing_credits = credits - inflight_credits; + int32_t total_on_loan; + int32_t available; + int32_t acquired_credits; + int32_t new_total_on_loan; + + if (likely(missing_credits <= 0)) { + port->inflight_credits -= credits; + return true; + } + + total_on_loan = rte_atomic32_read(&dsw->credits_on_loan); + available = dsw->max_inflight - total_on_loan; + acquired_credits = RTE_MAX(missing_credits, DSW_PORT_MIN_CREDITS); + + if (available < acquired_credits) + return false; + + /* This is a race, no locks are involved, and thus some other + * thread can allocate tokens in between the check and the + * allocation. + */ + new_total_on_loan = rte_atomic32_add_return(&dsw->credits_on_loan, + acquired_credits); + + if (unlikely(new_total_on_loan > dsw->max_inflight)) { + /* Some other port took the last credits */ + rte_atomic32_sub(&dsw->credits_on_loan, acquired_credits); + return false; + } + + DSW_LOG_DP_PORT(DEBUG, port->id, "Acquired %d tokens from pool.\n", + acquired_credits); + + port->inflight_credits += acquired_credits; + port->inflight_credits -= credits; + + return true; +} + +static void +dsw_port_return_credits(struct dsw_evdev *dsw, struct dsw_port *port, + int32_t credits) +{ + port->inflight_credits += credits; + + if (unlikely(port->inflight_credits > DSW_PORT_MAX_CREDITS)) { + int32_t leave_credits = DSW_PORT_MIN_CREDITS; + int32_t return_credits = + port->inflight_credits - leave_credits; + + port->inflight_credits = leave_credits; + + rte_atomic32_sub(&dsw->credits_on_loan, return_credits); + + DSW_LOG_DP_PORT(DEBUG, port->id, + "Returned %d tokens to pool.\n", + return_credits); + } +} + +static void +dsw_port_enqueue_stats(struct dsw_port *port, uint16_t num_new, + uint16_t num_forward, uint16_t num_release) +{ + port->new_enqueued += num_new; + port->forward_enqueued += num_forward; + port->release_enqueued += num_release; +} + +static void +dsw_port_queue_enqueue_stats(struct dsw_port *source_port, uint8_t queue_id) +{ + source_port->queue_enqueued[queue_id]++; +} + +static void +dsw_port_dequeue_stats(struct dsw_port *port, uint16_t num) +{ + port->dequeued += num; +} + +static void +dsw_port_queue_dequeued_stats(struct dsw_port *source_port, uint8_t queue_id) +{ + source_port->queue_dequeued[queue_id]++; +} + +static void +dsw_port_load_record(struct dsw_port *port, unsigned int dequeued) +{ + if (dequeued > 0 && port->busy_start == 0) + /* work period begins */ + port->busy_start = rte_get_timer_cycles(); + else if (dequeued == 0 && port->busy_start > 0) { + /* work period ends */ + uint64_t work_period = + rte_get_timer_cycles() - port->busy_start; + port->busy_cycles += work_period; + port->busy_start = 0; + } +} + +static int16_t +dsw_port_load_close_period(struct dsw_port *port, uint64_t now) +{ + uint64_t passed = now - port->measurement_start; + uint64_t busy_cycles = port->busy_cycles; + + if (port->busy_start > 0) { + busy_cycles += (now - port->busy_start); + port->busy_start = now; + } + + int16_t load = (DSW_MAX_LOAD * busy_cycles) / passed; + + port->measurement_start = now; + port->busy_cycles = 0; + + port->total_busy_cycles += busy_cycles; + + return load; +} + +static void +dsw_port_load_update(struct dsw_port *port, uint64_t now) +{ + int16_t old_load; + int16_t period_load; + int16_t new_load; + + old_load = rte_atomic16_read(&port->load); + + period_load = dsw_port_load_close_period(port, now); + + new_load = (period_load + old_load*DSW_OLD_LOAD_WEIGHT) / + (DSW_OLD_LOAD_WEIGHT+1); + + rte_atomic16_set(&port->load, new_load); + + /* The load of the recently immigrated flows should hopefully + * be reflected the load estimate by now. + */ + rte_atomic32_set(&port->immigration_load, 0); +} + +static void +dsw_port_consider_load_update(struct dsw_port *port, uint64_t now) +{ + if (now < port->next_load_update) + return; + + port->next_load_update = now + port->load_update_interval; + + dsw_port_load_update(port, now); +} + +static void +dsw_port_ctl_enqueue(struct dsw_port *port, struct dsw_ctl_msg *msg) +{ + /* there's always room on the ring */ + while (rte_ring_enqueue_elem(port->ctl_in_ring, msg, sizeof(*msg)) != 0) + rte_pause(); +} + +static int +dsw_port_ctl_dequeue(struct dsw_port *port, struct dsw_ctl_msg *msg) +{ + return rte_ring_dequeue_elem(port->ctl_in_ring, msg, sizeof(*msg)); +} + +static void +dsw_port_ctl_broadcast(struct dsw_evdev *dsw, struct dsw_port *source_port, + uint8_t type, struct dsw_queue_flow *qfs, + uint8_t qfs_len) +{ + uint16_t port_id; + struct dsw_ctl_msg msg = { + .type = type, + .originating_port_id = source_port->id, + .qfs_len = qfs_len + }; + + memcpy(msg.qfs, qfs, sizeof(struct dsw_queue_flow) * qfs_len); + + for (port_id = 0; port_id < dsw->num_ports; port_id++) + if (port_id != source_port->id) + dsw_port_ctl_enqueue(&dsw->ports[port_id], &msg); +} + +static __rte_always_inline bool +dsw_is_queue_flow_in_ary(const struct dsw_queue_flow *qfs, uint16_t qfs_len, + uint8_t queue_id, uint16_t flow_hash) +{ + uint16_t i; + + for (i = 0; i < qfs_len; i++) + if (qfs[i].queue_id == queue_id && + qfs[i].flow_hash == flow_hash) + return true; + + return false; +} + +static __rte_always_inline bool +dsw_port_is_flow_paused(struct dsw_port *port, uint8_t queue_id, + uint16_t flow_hash) +{ + return dsw_is_queue_flow_in_ary(port->paused_flows, + port->paused_flows_len, + queue_id, flow_hash); +} + +static void +dsw_port_add_paused_flows(struct dsw_port *port, struct dsw_queue_flow *qfs, + uint8_t qfs_len) +{ + uint8_t i; + + for (i = 0; i < qfs_len; i++) { + struct dsw_queue_flow *qf = &qfs[i]; + + DSW_LOG_DP_PORT(DEBUG, port->id, + "Pausing queue_id %d flow_hash %d.\n", + qf->queue_id, qf->flow_hash); + + port->paused_flows[port->paused_flows_len] = *qf; + port->paused_flows_len++; + }; +} + +static void +dsw_port_remove_paused_flow(struct dsw_port *port, + struct dsw_queue_flow *target_qf) +{ + uint16_t i; + + for (i = 0; i < port->paused_flows_len; i++) { + struct dsw_queue_flow *qf = &port->paused_flows[i]; + + if (qf->queue_id == target_qf->queue_id && + qf->flow_hash == target_qf->flow_hash) { + uint16_t last_idx = port->paused_flows_len-1; + if (i != last_idx) + port->paused_flows[i] = + port->paused_flows[last_idx]; + port->paused_flows_len--; + break; + } + } +} + +static void +dsw_port_remove_paused_flows(struct dsw_port *port, + struct dsw_queue_flow *qfs, uint8_t qfs_len) +{ + uint8_t i; + + for (i = 0; i < qfs_len; i++) + dsw_port_remove_paused_flow(port, &qfs[i]); + +} + +static void +dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port); + +static void +dsw_port_handle_pause_flows(struct dsw_evdev *dsw, struct dsw_port *port, + uint8_t originating_port_id, + struct dsw_queue_flow *paused_qfs, + uint8_t qfs_len) +{ + struct dsw_ctl_msg cfm = { + .type = DSW_CTL_CFM, + .originating_port_id = port->id + }; + + /* There might be already-scheduled events belonging to the + * paused flow in the output buffers. + */ + dsw_port_flush_out_buffers(dsw, port); + + dsw_port_add_paused_flows(port, paused_qfs, qfs_len); + + /* Make sure any stores to the original port's in_ring is seen + * before the ctl message. + */ + rte_smp_wmb(); + + dsw_port_ctl_enqueue(&dsw->ports[originating_port_id], &cfm); +} + +struct dsw_queue_flow_burst { + struct dsw_queue_flow queue_flow; + uint16_t count; +}; + +#define DSW_QF_TO_INT(_qf) \ + ((int)((((_qf)->queue_id)<<16)|((_qf)->flow_hash))) + +static inline int +dsw_cmp_qf(const void *v_qf_a, const void *v_qf_b) +{ + const struct dsw_queue_flow *qf_a = v_qf_a; + const struct dsw_queue_flow *qf_b = v_qf_b; + + return DSW_QF_TO_INT(qf_a) - DSW_QF_TO_INT(qf_b); +} + +static uint16_t +dsw_sort_qfs_to_bursts(struct dsw_queue_flow *qfs, uint16_t qfs_len, + struct dsw_queue_flow_burst *bursts) +{ + uint16_t i; + struct dsw_queue_flow_burst *current_burst = NULL; + uint16_t num_bursts = 0; + + /* We don't need the stable property, and the list is likely + * large enough for qsort() to outperform dsw_stable_sort(), + * so we use qsort() here. + */ + qsort(qfs, qfs_len, sizeof(qfs[0]), dsw_cmp_qf); + + /* arrange the (now-consecutive) events into bursts */ + for (i = 0; i < qfs_len; i++) { + if (i == 0 || + dsw_cmp_qf(&qfs[i], ¤t_burst->queue_flow) != 0) { + current_burst = &bursts[num_bursts]; + current_burst->queue_flow = qfs[i]; + current_burst->count = 0; + num_bursts++; + } + current_burst->count++; + } + + return num_bursts; +} + +static bool +dsw_retrieve_port_loads(struct dsw_evdev *dsw, int16_t *port_loads, + int16_t load_limit) +{ + bool below_limit = false; + uint16_t i; + + for (i = 0; i < dsw->num_ports; i++) { + int16_t measured_load = rte_atomic16_read(&dsw->ports[i].load); + int32_t immigration_load = + rte_atomic32_read(&dsw->ports[i].immigration_load); + int32_t load = measured_load + immigration_load; + + load = RTE_MIN(load, DSW_MAX_LOAD); + + if (load < load_limit) + below_limit = true; + port_loads[i] = load; + } + return below_limit; +} + +static int16_t +dsw_flow_load(uint16_t num_events, int16_t port_load) +{ + return ((int32_t)port_load * (int32_t)num_events) / + DSW_MAX_EVENTS_RECORDED; +} + +static int16_t +dsw_evaluate_migration(int16_t source_load, int16_t target_load, + int16_t flow_load) +{ + int32_t res_target_load; + int32_t imbalance; + + if (target_load > DSW_MAX_TARGET_LOAD_FOR_MIGRATION) + return -1; + + imbalance = source_load - target_load; + + if (imbalance < DSW_REBALANCE_THRESHOLD) + return -1; + + res_target_load = target_load + flow_load; + + /* If the estimated load of the target port will be higher + * than the source port's load, it doesn't make sense to move + * the flow. + */ + if (res_target_load > source_load) + return -1; + + /* The more idle the target will be, the better. This will + * make migration prefer moving smaller flows, and flows to + * lightly loaded ports. + */ + return DSW_MAX_LOAD - res_target_load; +} + +static bool +dsw_is_serving_port(struct dsw_evdev *dsw, uint8_t port_id, uint8_t queue_id) +{ + struct dsw_queue *queue = &dsw->queues[queue_id]; + uint16_t i; + + for (i = 0; i < queue->num_serving_ports; i++) + if (queue->serving_ports[i] == port_id) + return true; + + return false; +} + +static bool +dsw_select_emigration_target(struct dsw_evdev *dsw, + struct dsw_queue_flow_burst *bursts, + uint16_t num_bursts, uint8_t source_port_id, + int16_t *port_loads, uint16_t num_ports, + uint8_t *target_port_ids, + struct dsw_queue_flow *target_qfs, + uint8_t *targets_len) +{ + int16_t source_port_load = port_loads[source_port_id]; + struct dsw_queue_flow *candidate_qf = NULL; + uint8_t candidate_port_id = 0; + int16_t candidate_weight = -1; + int16_t candidate_flow_load = -1; + uint16_t i; + + if (source_port_load < DSW_MIN_SOURCE_LOAD_FOR_MIGRATION) + return false; + + for (i = 0; i < num_bursts; i++) { + struct dsw_queue_flow_burst *burst = &bursts[i]; + struct dsw_queue_flow *qf = &burst->queue_flow; + int16_t flow_load; + uint16_t port_id; + + if (dsw_is_queue_flow_in_ary(target_qfs, *targets_len, + qf->queue_id, qf->flow_hash)) + continue; + + flow_load = dsw_flow_load(burst->count, source_port_load); + + for (port_id = 0; port_id < num_ports; port_id++) { + int16_t weight; + + if (port_id == source_port_id) + continue; + + if (!dsw_is_serving_port(dsw, port_id, qf->queue_id)) + continue; + + weight = dsw_evaluate_migration(source_port_load, + port_loads[port_id], + flow_load); + + if (weight > candidate_weight) { + candidate_qf = qf; + candidate_port_id = port_id; + candidate_weight = weight; + candidate_flow_load = flow_load; + } + } + } + + if (candidate_weight < 0) + return false; + + DSW_LOG_DP_PORT(DEBUG, source_port_id, "Selected queue_id %d " + "flow_hash %d (with flow load %d) for migration " + "to port %d.\n", candidate_qf->queue_id, + candidate_qf->flow_hash, + DSW_LOAD_TO_PERCENT(candidate_flow_load), + candidate_port_id); + + port_loads[candidate_port_id] += candidate_flow_load; + port_loads[source_port_id] -= candidate_flow_load; + + target_port_ids[*targets_len] = candidate_port_id; + target_qfs[*targets_len] = *candidate_qf; + (*targets_len)++; + + rte_atomic32_add(&dsw->ports[candidate_port_id].immigration_load, + candidate_flow_load); + + return true; +} + +static void +dsw_select_emigration_targets(struct dsw_evdev *dsw, + struct dsw_port *source_port, + struct dsw_queue_flow_burst *bursts, + uint16_t num_bursts, int16_t *port_loads) +{ + struct dsw_queue_flow *target_qfs = source_port->emigration_target_qfs; + uint8_t *target_port_ids = source_port->emigration_target_port_ids; + uint8_t *targets_len = &source_port->emigration_targets_len; + uint16_t i; + + for (i = 0; i < DSW_MAX_FLOWS_PER_MIGRATION; i++) { + bool found; + + found = dsw_select_emigration_target(dsw, bursts, num_bursts, + source_port->id, + port_loads, dsw->num_ports, + target_port_ids, + target_qfs, + targets_len); + if (!found) + break; + } + + if (*targets_len == 0) + DSW_LOG_DP_PORT(DEBUG, source_port->id, + "For the %d flows considered, no target port " + "was found.\n", num_bursts); +} + +static uint8_t +dsw_schedule(struct dsw_evdev *dsw, uint8_t queue_id, uint16_t flow_hash) +{ + struct dsw_queue *queue = &dsw->queues[queue_id]; + uint8_t port_id; + + if (queue->num_serving_ports > 1) + port_id = queue->flow_to_port_map[flow_hash]; + else + /* A single-link queue, or atomic/ordered/parallel but + * with just a single serving port. + */ + port_id = queue->serving_ports[0]; + + DSW_LOG_DP(DEBUG, "Event with queue_id %d flow_hash %d is scheduled " + "to port %d.\n", queue_id, flow_hash, port_id); + + return port_id; +} + +static void +dsw_port_transmit_buffered(struct dsw_evdev *dsw, struct dsw_port *source_port, + uint8_t dest_port_id) +{ + struct dsw_port *dest_port = &(dsw->ports[dest_port_id]); + uint16_t *buffer_len = &source_port->out_buffer_len[dest_port_id]; + struct rte_event *buffer = source_port->out_buffer[dest_port_id]; + uint16_t enqueued = 0; + + if (*buffer_len == 0) + return; + + /* The rings are dimensioned to fit all in-flight events (even + * on a single ring), so looping will work. + */ + do { + enqueued += + rte_event_ring_enqueue_burst(dest_port->in_ring, + buffer+enqueued, + *buffer_len-enqueued, + NULL); + } while (unlikely(enqueued != *buffer_len)); + + (*buffer_len) = 0; +} + +static uint16_t +dsw_port_get_parallel_flow_id(struct dsw_port *port) +{ + uint16_t flow_id = port->next_parallel_flow_id; + + port->next_parallel_flow_id = + (port->next_parallel_flow_id + 1) % DSW_PARALLEL_FLOWS; + + return flow_id; +} + +static void +dsw_port_buffer_paused(struct dsw_port *port, + const struct rte_event *paused_event) +{ + port->paused_events[port->paused_events_len] = *paused_event; + port->paused_events_len++; +} + +static void +dsw_port_buffer_non_paused(struct dsw_evdev *dsw, struct dsw_port *source_port, + uint8_t dest_port_id, const struct rte_event *event) +{ + struct rte_event *buffer = source_port->out_buffer[dest_port_id]; + uint16_t *buffer_len = &source_port->out_buffer_len[dest_port_id]; + + if (*buffer_len == DSW_MAX_PORT_OUT_BUFFER) + dsw_port_transmit_buffered(dsw, source_port, dest_port_id); + + buffer[*buffer_len] = *event; + + (*buffer_len)++; +} + +#define DSW_FLOW_ID_BITS (24) +static uint16_t +dsw_flow_id_hash(uint32_t flow_id) +{ + uint16_t hash = 0; + uint16_t offset = 0; + + do { + hash ^= ((flow_id >> offset) & DSW_MAX_FLOWS_MASK); + offset += DSW_MAX_FLOWS_BITS; + } while (offset < DSW_FLOW_ID_BITS); + + return hash; +} + +static void +dsw_port_buffer_parallel(struct dsw_evdev *dsw, struct dsw_port *source_port, + struct rte_event event) +{ + uint8_t dest_port_id; + + event.flow_id = dsw_port_get_parallel_flow_id(source_port); + + dest_port_id = dsw_schedule(dsw, event.queue_id, + dsw_flow_id_hash(event.flow_id)); + + dsw_port_buffer_non_paused(dsw, source_port, dest_port_id, &event); +} + +static void +dsw_port_buffer_event(struct dsw_evdev *dsw, struct dsw_port *source_port, + const struct rte_event *event) +{ + uint16_t flow_hash; + uint8_t dest_port_id; + + if (unlikely(dsw->queues[event->queue_id].schedule_type == + RTE_SCHED_TYPE_PARALLEL)) { + dsw_port_buffer_parallel(dsw, source_port, *event); + return; + } + + flow_hash = dsw_flow_id_hash(event->flow_id); + + if (unlikely(dsw_port_is_flow_paused(source_port, event->queue_id, + flow_hash))) { + dsw_port_buffer_paused(source_port, event); + return; + } + + dest_port_id = dsw_schedule(dsw, event->queue_id, flow_hash); + + dsw_port_buffer_non_paused(dsw, source_port, dest_port_id, event); +} + +static void +dsw_port_flush_paused_events(struct dsw_evdev *dsw, + struct dsw_port *source_port, + const struct dsw_queue_flow *qf) +{ + uint16_t paused_events_len = source_port->paused_events_len; + struct rte_event paused_events[paused_events_len]; + uint8_t dest_port_id; + uint16_t i; + + if (paused_events_len == 0) + return; + + if (dsw_port_is_flow_paused(source_port, qf->queue_id, qf->flow_hash)) + return; + + rte_memcpy(paused_events, source_port->paused_events, + paused_events_len * sizeof(struct rte_event)); + + source_port->paused_events_len = 0; + + dest_port_id = dsw_schedule(dsw, qf->queue_id, qf->flow_hash); + + for (i = 0; i < paused_events_len; i++) { + struct rte_event *event = &paused_events[i]; + uint16_t flow_hash; + + flow_hash = dsw_flow_id_hash(event->flow_id); + + if (event->queue_id == qf->queue_id && + flow_hash == qf->flow_hash) + dsw_port_buffer_non_paused(dsw, source_port, + dest_port_id, event); + else + dsw_port_buffer_paused(source_port, event); + } +} + +static void +dsw_port_emigration_stats(struct dsw_port *port, uint8_t finished) +{ + uint64_t flow_migration_latency; + + flow_migration_latency = + (rte_get_timer_cycles() - port->emigration_start); + port->emigration_latency += (flow_migration_latency * finished); + port->emigrations += finished; +} + +static void +dsw_port_end_emigration(struct dsw_evdev *dsw, struct dsw_port *port, + uint8_t schedule_type) +{ + uint8_t i; + struct dsw_queue_flow left_qfs[DSW_MAX_FLOWS_PER_MIGRATION]; + uint8_t left_port_ids[DSW_MAX_FLOWS_PER_MIGRATION]; + uint8_t left_qfs_len = 0; + uint8_t finished; + + for (i = 0; i < port->emigration_targets_len; i++) { + struct dsw_queue_flow *qf = &port->emigration_target_qfs[i]; + uint8_t queue_id = qf->queue_id; + uint8_t queue_schedule_type = + dsw->queues[queue_id].schedule_type; + uint16_t flow_hash = qf->flow_hash; + + if (queue_schedule_type != schedule_type) { + left_port_ids[left_qfs_len] = + port->emigration_target_port_ids[i]; + left_qfs[left_qfs_len] = *qf; + left_qfs_len++; + continue; + } + + DSW_LOG_DP_PORT(DEBUG, port->id, "Migration completed for " + "queue_id %d flow_hash %d.\n", queue_id, + flow_hash); + + if (queue_schedule_type == RTE_SCHED_TYPE_ATOMIC) { + dsw_port_remove_paused_flow(port, qf); + dsw_port_flush_paused_events(dsw, port, qf); + } + } + + finished = port->emigration_targets_len - left_qfs_len; + + if (finished > 0) + dsw_port_emigration_stats(port, finished); + + for (i = 0; i < left_qfs_len; i++) { + port->emigration_target_port_ids[i] = left_port_ids[i]; + port->emigration_target_qfs[i] = left_qfs[i]; + } + port->emigration_targets_len = left_qfs_len; + + if (port->emigration_targets_len == 0) { + port->migration_state = DSW_MIGRATION_STATE_IDLE; + port->seen_events_len = 0; + } +} + +static void +dsw_port_move_parallel_flows(struct dsw_evdev *dsw, + struct dsw_port *source_port) +{ + uint8_t i; + + for (i = 0; i < source_port->emigration_targets_len; i++) { + struct dsw_queue_flow *qf = + &source_port->emigration_target_qfs[i]; + uint8_t queue_id = qf->queue_id; + + if (dsw->queues[queue_id].schedule_type == + RTE_SCHED_TYPE_PARALLEL) { + uint8_t dest_port_id = + source_port->emigration_target_port_ids[i]; + uint16_t flow_hash = qf->flow_hash; + + /* Single byte-sized stores are always atomic. */ + dsw->queues[queue_id].flow_to_port_map[flow_hash] = + dest_port_id; + } + } + + rte_smp_wmb(); + + dsw_port_end_emigration(dsw, source_port, RTE_SCHED_TYPE_PARALLEL); +} + +static void +dsw_port_consider_emigration(struct dsw_evdev *dsw, + struct dsw_port *source_port, + uint64_t now) +{ + bool any_port_below_limit; + struct dsw_queue_flow *seen_events = source_port->seen_events; + uint16_t seen_events_len = source_port->seen_events_len; + struct dsw_queue_flow_burst bursts[DSW_MAX_EVENTS_RECORDED]; + uint16_t num_bursts; + int16_t source_port_load; + int16_t port_loads[dsw->num_ports]; + + if (now < source_port->next_emigration) + return; + + if (dsw->num_ports == 1) + return; + + if (seen_events_len < DSW_MAX_EVENTS_RECORDED) + return; + + DSW_LOG_DP_PORT(DEBUG, source_port->id, "Considering emigration.\n"); + + /* Randomize interval to avoid having all threads considering + * emigration at the same in point in time, which might lead + * to all choosing the same target port. + */ + source_port->next_emigration = now + + source_port->migration_interval / 2 + + rte_rand() % source_port->migration_interval; + + if (source_port->migration_state != DSW_MIGRATION_STATE_IDLE) { + DSW_LOG_DP_PORT(DEBUG, source_port->id, + "Emigration already in progress.\n"); + return; + } + + /* For simplicity, avoid migration in the unlikely case there + * is still events to consume in the in_buffer (from the last + * emigration). + */ + if (source_port->in_buffer_len > 0) { + DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are still " + "events in the input buffer.\n"); + return; + } + + source_port_load = rte_atomic16_read(&source_port->load); + if (source_port_load < DSW_MIN_SOURCE_LOAD_FOR_MIGRATION) { + DSW_LOG_DP_PORT(DEBUG, source_port->id, + "Load %d is below threshold level %d.\n", + DSW_LOAD_TO_PERCENT(source_port_load), + DSW_LOAD_TO_PERCENT(DSW_MIN_SOURCE_LOAD_FOR_MIGRATION)); + return; + } + + /* Avoid starting any expensive operations (sorting etc), in + * case of a scenario with all ports above the load limit. + */ + any_port_below_limit = + dsw_retrieve_port_loads(dsw, port_loads, + DSW_MAX_TARGET_LOAD_FOR_MIGRATION); + if (!any_port_below_limit) { + DSW_LOG_DP_PORT(DEBUG, source_port->id, + "Candidate target ports are all too highly " + "loaded.\n"); + return; + } + + num_bursts = dsw_sort_qfs_to_bursts(seen_events, seen_events_len, + bursts); + + /* For non-big-little systems, there's no point in moving the + * only (known) flow. + */ + if (num_bursts < 2) { + DSW_LOG_DP_PORT(DEBUG, source_port->id, "Only a single flow " + "queue_id %d flow_hash %d has been seen.\n", + bursts[0].queue_flow.queue_id, + bursts[0].queue_flow.flow_hash); + return; + } + + dsw_select_emigration_targets(dsw, source_port, bursts, num_bursts, + port_loads); + + if (source_port->emigration_targets_len == 0) + return; + + source_port->migration_state = DSW_MIGRATION_STATE_PAUSING; + source_port->emigration_start = rte_get_timer_cycles(); + + /* No need to go through the whole pause procedure for + * parallel queues, since atomic/ordered semantics need not to + * be maintained. + */ + dsw_port_move_parallel_flows(dsw, source_port); + + /* All flows were on PARALLEL queues. */ + if (source_port->migration_state == DSW_MIGRATION_STATE_IDLE) + return; + + /* There might be 'loopback' events already scheduled in the + * output buffers. + */ + dsw_port_flush_out_buffers(dsw, source_port); + + dsw_port_add_paused_flows(source_port, + source_port->emigration_target_qfs, + source_port->emigration_targets_len); + + dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_PAUS_REQ, + source_port->emigration_target_qfs, + source_port->emigration_targets_len); + source_port->cfm_cnt = 0; +} + +static void +dsw_port_flush_paused_events(struct dsw_evdev *dsw, + struct dsw_port *source_port, + const struct dsw_queue_flow *qf); + +static void +dsw_port_handle_unpause_flows(struct dsw_evdev *dsw, struct dsw_port *port, + uint8_t originating_port_id, + struct dsw_queue_flow *paused_qfs, + uint8_t qfs_len) +{ + uint16_t i; + struct dsw_ctl_msg cfm = { + .type = DSW_CTL_CFM, + .originating_port_id = port->id + }; + + dsw_port_remove_paused_flows(port, paused_qfs, qfs_len); + + rte_smp_rmb(); + + dsw_port_ctl_enqueue(&dsw->ports[originating_port_id], &cfm); + + for (i = 0; i < qfs_len; i++) { + struct dsw_queue_flow *qf = &paused_qfs[i]; + + if (dsw_schedule(dsw, qf->queue_id, qf->flow_hash) == port->id) + port->immigrations++; + + dsw_port_flush_paused_events(dsw, port, qf); + } +} + +#define FORWARD_BURST_SIZE (32) + +static void +dsw_port_forward_emigrated_flow(struct dsw_port *source_port, + struct rte_event_ring *dest_ring, + uint8_t queue_id, + uint16_t flow_hash) +{ + uint16_t events_left; + + /* Control ring message should been seen before the ring count + * is read on the port's in_ring. + */ + rte_smp_rmb(); + + events_left = rte_event_ring_count(source_port->in_ring); + + while (events_left > 0) { + uint16_t in_burst_size = + RTE_MIN(FORWARD_BURST_SIZE, events_left); + struct rte_event in_burst[in_burst_size]; + uint16_t in_len; + uint16_t i; + + in_len = rte_event_ring_dequeue_burst(source_port->in_ring, + in_burst, + in_burst_size, NULL); + /* No need to care about bursting forwarded events (to + * the destination port's in_ring), since migration + * doesn't happen very often, and also the majority of + * the dequeued events will likely *not* be forwarded. + */ + for (i = 0; i < in_len; i++) { + struct rte_event *e = &in_burst[i]; + if (e->queue_id == queue_id && + dsw_flow_id_hash(e->flow_id) == flow_hash) { + while (rte_event_ring_enqueue_burst(dest_ring, + e, 1, + NULL) != 1) + rte_pause(); + } else { + uint16_t last_idx = source_port->in_buffer_len; + source_port->in_buffer[last_idx] = *e; + source_port->in_buffer_len++; + } + } + + events_left -= in_len; + } +} + +static void +dsw_port_move_emigrating_flows(struct dsw_evdev *dsw, + struct dsw_port *source_port) +{ + uint8_t i; + + dsw_port_flush_out_buffers(dsw, source_port); + + rte_smp_wmb(); + + for (i = 0; i < source_port->emigration_targets_len; i++) { + struct dsw_queue_flow *qf = + &source_port->emigration_target_qfs[i]; + uint8_t dest_port_id = + source_port->emigration_target_port_ids[i]; + struct dsw_port *dest_port = &dsw->ports[dest_port_id]; + + dsw->queues[qf->queue_id].flow_to_port_map[qf->flow_hash] = + dest_port_id; + + dsw_port_forward_emigrated_flow(source_port, dest_port->in_ring, + qf->queue_id, qf->flow_hash); + } + + /* Flow table update and migration destination port's enqueues + * must be seen before the control message. + */ + rte_smp_wmb(); + + dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_UNPAUS_REQ, + source_port->emigration_target_qfs, + source_port->emigration_targets_len); + source_port->cfm_cnt = 0; + source_port->migration_state = DSW_MIGRATION_STATE_UNPAUSING; +} + +static void +dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port) +{ + port->cfm_cnt++; + + if (port->cfm_cnt == (dsw->num_ports-1)) { + switch (port->migration_state) { + case DSW_MIGRATION_STATE_PAUSING: + DSW_LOG_DP_PORT(DEBUG, port->id, "Going into forwarding " + "migration state.\n"); + port->migration_state = DSW_MIGRATION_STATE_FORWARDING; + break; + case DSW_MIGRATION_STATE_UNPAUSING: + dsw_port_end_emigration(dsw, port, + RTE_SCHED_TYPE_ATOMIC); + break; + default: + RTE_ASSERT(0); + break; + } + } +} + +static void +dsw_port_ctl_process(struct dsw_evdev *dsw, struct dsw_port *port) +{ + struct dsw_ctl_msg msg; + + if (dsw_port_ctl_dequeue(port, &msg) == 0) { + switch (msg.type) { + case DSW_CTL_PAUS_REQ: + dsw_port_handle_pause_flows(dsw, port, + msg.originating_port_id, + msg.qfs, msg.qfs_len); + break; + case DSW_CTL_UNPAUS_REQ: + dsw_port_handle_unpause_flows(dsw, port, + msg.originating_port_id, + msg.qfs, msg.qfs_len); + break; + case DSW_CTL_CFM: + dsw_port_handle_confirm(dsw, port); + break; + } + } +} + +static void +dsw_port_note_op(struct dsw_port *port, uint16_t num_events) +{ + /* To pull the control ring reasonbly often on busy ports, + * each dequeued/enqueued event is considered an 'op' too. + */ + port->ops_since_bg_task += (num_events+1); +} + +static void +dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port) +{ + if (unlikely(port->migration_state == DSW_MIGRATION_STATE_FORWARDING && + port->pending_releases == 0)) + dsw_port_move_emigrating_flows(dsw, port); + + /* Polling the control ring is relatively inexpensive, and + * polling it often helps bringing down migration latency, so + * do this for every iteration. + */ + dsw_port_ctl_process(dsw, port); + + /* To avoid considering migration and flushing output buffers + * on every dequeue/enqueue call, the scheduler only performs + * such 'background' tasks every nth + * (i.e. DSW_MAX_PORT_OPS_PER_BG_TASK) operation. + */ + if (unlikely(port->ops_since_bg_task >= DSW_MAX_PORT_OPS_PER_BG_TASK)) { + uint64_t now; + + now = rte_get_timer_cycles(); + + port->last_bg = now; + + /* Logic to avoid having events linger in the output + * buffer too long. + */ + dsw_port_flush_out_buffers(dsw, port); + + dsw_port_consider_load_update(port, now); + + dsw_port_consider_emigration(dsw, port, now); + + port->ops_since_bg_task = 0; + } +} + +static void +dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port) +{ + uint16_t dest_port_id; + + for (dest_port_id = 0; dest_port_id < dsw->num_ports; dest_port_id++) + dsw_port_transmit_buffered(dsw, source_port, dest_port_id); +} + +uint16_t +dsw_event_enqueue(void *port, const struct rte_event *ev) +{ + return dsw_event_enqueue_burst(port, ev, unlikely(ev == NULL) ? 0 : 1); +} + +static __rte_always_inline uint16_t +dsw_event_enqueue_burst_generic(struct dsw_port *source_port, + const struct rte_event events[], + uint16_t events_len, bool op_types_known, + uint16_t num_new, uint16_t num_release, + uint16_t num_non_release) +{ + struct dsw_evdev *dsw = source_port->dsw; + bool enough_credits; + uint16_t i; + + DSW_LOG_DP_PORT(DEBUG, source_port->id, "Attempting to enqueue %d " + "events to port %d.\n", events_len, source_port->id); + + dsw_port_bg_process(dsw, source_port); + + /* XXX: For performance (=ring efficiency) reasons, the + * scheduler relies on internal non-ring buffers instead of + * immediately sending the event to the destination ring. For + * a producer that doesn't intend to produce or consume any + * more events, the scheduler provides a way to flush the + * buffer, by means of doing an enqueue of zero events. In + * addition, a port cannot be left "unattended" (e.g. unused) + * for long periods of time, since that would stall + * migration. Eventdev API extensions to provide a cleaner way + * to archieve both of these functions should be + * considered. + */ + if (unlikely(events_len == 0)) { + dsw_port_note_op(source_port, DSW_MAX_PORT_OPS_PER_BG_TASK); + dsw_port_flush_out_buffers(dsw, source_port); + return 0; + } + + dsw_port_note_op(source_port, events_len); + + if (!op_types_known) + for (i = 0; i < events_len; i++) { + switch (events[i].op) { + case RTE_EVENT_OP_RELEASE: + num_release++; + break; + case RTE_EVENT_OP_NEW: + num_new++; + /* Falls through. */ + default: + num_non_release++; + break; + } + } + + /* Technically, we could allow the non-new events up to the + * first new event in the array into the system, but for + * simplicity reasons, we deny the whole burst if the port is + * above the water mark. + */ + if (unlikely(num_new > 0 && rte_atomic32_read(&dsw->credits_on_loan) > + source_port->new_event_threshold)) + return 0; + + enough_credits = dsw_port_acquire_credits(dsw, source_port, + num_non_release); + if (unlikely(!enough_credits)) + return 0; + + source_port->pending_releases -= num_release; + + dsw_port_enqueue_stats(source_port, num_new, + num_non_release-num_new, num_release); + + for (i = 0; i < events_len; i++) { + const struct rte_event *event = &events[i]; + + if (likely(num_release == 0 || + event->op != RTE_EVENT_OP_RELEASE)) + dsw_port_buffer_event(dsw, source_port, event); + dsw_port_queue_enqueue_stats(source_port, event->queue_id); + } + + DSW_LOG_DP_PORT(DEBUG, source_port->id, "%d non-release events " + "accepted.\n", num_non_release); + + return (num_non_release + num_release); +} + +uint16_t +dsw_event_enqueue_burst(void *port, const struct rte_event events[], + uint16_t events_len) +{ + struct dsw_port *source_port = port; + + if (unlikely(events_len > source_port->enqueue_depth)) + events_len = source_port->enqueue_depth; + + return dsw_event_enqueue_burst_generic(source_port, events, + events_len, false, 0, 0, 0); +} + +uint16_t +dsw_event_enqueue_new_burst(void *port, const struct rte_event events[], + uint16_t events_len) +{ + struct dsw_port *source_port = port; + + if (unlikely(events_len > source_port->enqueue_depth)) + events_len = source_port->enqueue_depth; + + return dsw_event_enqueue_burst_generic(source_port, events, + events_len, true, events_len, + 0, events_len); +} + +uint16_t +dsw_event_enqueue_forward_burst(void *port, const struct rte_event events[], + uint16_t events_len) +{ + struct dsw_port *source_port = port; + + if (unlikely(events_len > source_port->enqueue_depth)) + events_len = source_port->enqueue_depth; + + return dsw_event_enqueue_burst_generic(source_port, events, + events_len, true, 0, 0, + events_len); +} + +uint16_t +dsw_event_dequeue(void *port, struct rte_event *events, uint64_t wait) +{ + return dsw_event_dequeue_burst(port, events, 1, wait); +} + +static void +dsw_port_record_seen_events(struct dsw_port *port, struct rte_event *events, + uint16_t num) +{ + uint16_t i; + + dsw_port_dequeue_stats(port, num); + + for (i = 0; i < num; i++) { + uint16_t l_idx = port->seen_events_idx; + struct dsw_queue_flow *qf = &port->seen_events[l_idx]; + struct rte_event *event = &events[i]; + qf->queue_id = event->queue_id; + qf->flow_hash = dsw_flow_id_hash(event->flow_id); + + port->seen_events_idx = (l_idx+1) % DSW_MAX_EVENTS_RECORDED; + + dsw_port_queue_dequeued_stats(port, event->queue_id); + } + + if (unlikely(port->seen_events_len != DSW_MAX_EVENTS_RECORDED)) + port->seen_events_len = + RTE_MIN(port->seen_events_len + num, + DSW_MAX_EVENTS_RECORDED); +} + +#ifdef DSW_SORT_DEQUEUED + +#define DSW_EVENT_TO_INT(_event) \ + ((int)((((_event)->queue_id)<<16)|((_event)->flow_id))) + +static inline int +dsw_cmp_event(const void *v_event_a, const void *v_event_b) +{ + const struct rte_event *event_a = v_event_a; + const struct rte_event *event_b = v_event_b; + + return DSW_EVENT_TO_INT(event_a) - DSW_EVENT_TO_INT(event_b); +} +#endif + +static uint16_t +dsw_port_dequeue_burst(struct dsw_port *port, struct rte_event *events, + uint16_t num) +{ + if (unlikely(port->in_buffer_len > 0)) { + uint16_t dequeued = RTE_MIN(num, port->in_buffer_len); + + rte_memcpy(events, &port->in_buffer[port->in_buffer_start], + dequeued * sizeof(struct rte_event)); + + port->in_buffer_start += dequeued; + port->in_buffer_len -= dequeued; + + if (port->in_buffer_len == 0) + port->in_buffer_start = 0; + + return dequeued; + } + + return rte_event_ring_dequeue_burst(port->in_ring, events, num, NULL); +} + +uint16_t +dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num, + uint64_t wait __rte_unused) +{ + struct dsw_port *source_port = port; + struct dsw_evdev *dsw = source_port->dsw; + uint16_t dequeued; + + source_port->pending_releases = 0; + + dsw_port_bg_process(dsw, source_port); + + if (unlikely(num > source_port->dequeue_depth)) + num = source_port->dequeue_depth; + + dequeued = dsw_port_dequeue_burst(source_port, events, num); + + source_port->pending_releases = dequeued; + + dsw_port_load_record(source_port, dequeued); + + dsw_port_note_op(source_port, dequeued); + + if (dequeued > 0) { + DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n", + dequeued); + + dsw_port_return_credits(dsw, source_port, dequeued); + + /* One potential optimization one might think of is to + * add a migration state (prior to 'pausing'), and + * only record seen events when the port is in this + * state (and transit to 'pausing' when enough events + * have been gathered). However, that schema doesn't + * seem to improve performance. + */ + dsw_port_record_seen_events(port, events, dequeued); + } else /* Zero-size dequeue means a likely idle port, and thus + * we can afford trading some efficiency for a slightly + * reduced event wall-time latency. + */ + dsw_port_flush_out_buffers(dsw, port); + +#ifdef DSW_SORT_DEQUEUED + dsw_stable_sort(events, dequeued, sizeof(events[0]), dsw_cmp_event); +#endif + + return dequeued; +} diff --git a/src/spdk/dpdk/drivers/event/dsw/dsw_sort.h b/src/spdk/dpdk/drivers/event/dsw/dsw_sort.h new file mode 100644 index 000000000..609767fdf --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dsw/dsw_sort.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Ericsson AB + */ + +#ifndef _DSW_SORT_ +#define _DSW_SORT_ + +#include <string.h> + +#include <rte_common.h> + +#define DSW_ARY_ELEM_PTR(_ary, _idx, _elem_size) \ + RTE_PTR_ADD(_ary, (_idx) * (_elem_size)) + +#define DSW_ARY_ELEM_SWAP(_ary, _a_idx, _b_idx, _elem_size) \ + do { \ + char tmp[_elem_size]; \ + void *_a_ptr = DSW_ARY_ELEM_PTR(_ary, _a_idx, _elem_size); \ + void *_b_ptr = DSW_ARY_ELEM_PTR(_ary, _b_idx, _elem_size); \ + memcpy(tmp, _a_ptr, _elem_size); \ + memcpy(_a_ptr, _b_ptr, _elem_size); \ + memcpy(_b_ptr, tmp, _elem_size); \ + } while (0) + +static inline void +dsw_insertion_sort(void *ary, uint16_t len, uint16_t elem_size, + int (*cmp_fn)(const void *, const void *)) +{ + uint16_t i; + + for (i = 1; i < len; i++) { + uint16_t j; + for (j = i; j > 0 && + cmp_fn(DSW_ARY_ELEM_PTR(ary, j-1, elem_size), + DSW_ARY_ELEM_PTR(ary, j, elem_size)) > 0; + j--) + DSW_ARY_ELEM_SWAP(ary, j, j-1, elem_size); + } +} + +static inline void +dsw_stable_sort(void *ary, uint16_t len, uint16_t elem_size, + int (*cmp_fn)(const void *, const void *)) +{ + dsw_insertion_sort(ary, len, elem_size, cmp_fn); +} + +#endif diff --git a/src/spdk/dpdk/drivers/event/dsw/dsw_xstats.c b/src/spdk/dpdk/drivers/event/dsw/dsw_xstats.c new file mode 100644 index 000000000..e8e92183e --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dsw/dsw_xstats.c @@ -0,0 +1,376 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018-2019 Ericsson AB + */ + +#include "dsw_evdev.h" + +#include <stdbool.h> +#include <string.h> + +#include <rte_debug.h> + +/* The high bits in the xstats id is used to store an additional + * parameter (beyond the queue or port id already in the xstats + * interface). + */ +#define DSW_XSTATS_ID_PARAM_BITS (8) +#define DSW_XSTATS_ID_STAT_BITS \ + (sizeof(unsigned int)*CHAR_BIT - DSW_XSTATS_ID_PARAM_BITS) +#define DSW_XSTATS_ID_STAT_MASK ((1 << DSW_XSTATS_ID_STAT_BITS) - 1) + +#define DSW_XSTATS_ID_GET_PARAM(id) \ + ((id)>>DSW_XSTATS_ID_STAT_BITS) + +#define DSW_XSTATS_ID_GET_STAT(id) \ + ((id) & DSW_XSTATS_ID_STAT_MASK) + +#define DSW_XSTATS_ID_CREATE(id, param_value) \ + (((param_value) << DSW_XSTATS_ID_STAT_BITS) | id) + +typedef +uint64_t (*dsw_xstats_dev_get_value_fn)(struct dsw_evdev *dsw); + +struct dsw_xstat_dev { + const char *name; + dsw_xstats_dev_get_value_fn get_value_fn; +}; + +typedef +uint64_t (*dsw_xstats_port_get_value_fn)(struct dsw_evdev *dsw, + uint8_t port_id, uint8_t queue_id); + +struct dsw_xstats_port { + const char *name_fmt; + dsw_xstats_port_get_value_fn get_value_fn; + bool per_queue; +}; + +static uint64_t +dsw_xstats_dev_credits_on_loan(struct dsw_evdev *dsw) +{ + return rte_atomic32_read(&dsw->credits_on_loan); +} + +static struct dsw_xstat_dev dsw_dev_xstats[] = { + { "dev_credits_on_loan", dsw_xstats_dev_credits_on_loan } +}; + +#define DSW_GEN_PORT_ACCESS_FN(_variable) \ + static uint64_t \ + dsw_xstats_port_get_ ## _variable(struct dsw_evdev *dsw, \ + uint8_t port_id, \ + uint8_t queue_id __rte_unused) \ + { \ + return dsw->ports[port_id]._variable; \ + } + +DSW_GEN_PORT_ACCESS_FN(new_enqueued) +DSW_GEN_PORT_ACCESS_FN(forward_enqueued) +DSW_GEN_PORT_ACCESS_FN(release_enqueued) + +static uint64_t +dsw_xstats_port_get_queue_enqueued(struct dsw_evdev *dsw, uint8_t port_id, + uint8_t queue_id) +{ + return dsw->ports[port_id].queue_enqueued[queue_id]; +} + +DSW_GEN_PORT_ACCESS_FN(dequeued) + +static uint64_t +dsw_xstats_port_get_queue_dequeued(struct dsw_evdev *dsw, uint8_t port_id, + uint8_t queue_id) +{ + return dsw->ports[port_id].queue_dequeued[queue_id]; +} + +DSW_GEN_PORT_ACCESS_FN(emigrations) +DSW_GEN_PORT_ACCESS_FN(immigrations) + +static uint64_t +dsw_xstats_port_get_migration_latency(struct dsw_evdev *dsw, uint8_t port_id, + uint8_t queue_id __rte_unused) +{ + uint64_t total_latency = dsw->ports[port_id].emigration_latency; + uint64_t num_emigrations = dsw->ports[port_id].emigrations; + + return num_emigrations > 0 ? total_latency / num_emigrations : 0; +} + +static uint64_t +dsw_xstats_port_get_event_proc_latency(struct dsw_evdev *dsw, uint8_t port_id, + uint8_t queue_id __rte_unused) +{ + uint64_t total_busy_cycles = + dsw->ports[port_id].total_busy_cycles; + uint64_t dequeued = + dsw->ports[port_id].dequeued; + + return dequeued > 0 ? total_busy_cycles / dequeued : 0; +} + +static uint64_t +dsw_xstats_port_get_busy_cycles(struct dsw_evdev *dsw, uint8_t port_id, + uint8_t queue_id __rte_unused) +{ + return dsw->ports[port_id].total_busy_cycles; +} + +DSW_GEN_PORT_ACCESS_FN(inflight_credits) + +DSW_GEN_PORT_ACCESS_FN(pending_releases) + +static uint64_t +dsw_xstats_port_get_load(struct dsw_evdev *dsw, uint8_t port_id, + uint8_t queue_id __rte_unused) +{ + int16_t load; + + load = rte_atomic16_read(&dsw->ports[port_id].load); + + return DSW_LOAD_TO_PERCENT(load); +} + +DSW_GEN_PORT_ACCESS_FN(last_bg) + +static struct dsw_xstats_port dsw_port_xstats[] = { + { "port_%u_new_enqueued", dsw_xstats_port_get_new_enqueued, + false }, + { "port_%u_forward_enqueued", dsw_xstats_port_get_forward_enqueued, + false }, + { "port_%u_release_enqueued", dsw_xstats_port_get_release_enqueued, + false }, + { "port_%u_queue_%u_enqueued", dsw_xstats_port_get_queue_enqueued, + true }, + { "port_%u_dequeued", dsw_xstats_port_get_dequeued, + false }, + { "port_%u_queue_%u_dequeued", dsw_xstats_port_get_queue_dequeued, + true }, + { "port_%u_emigrations", dsw_xstats_port_get_emigrations, + false }, + { "port_%u_migration_latency", dsw_xstats_port_get_migration_latency, + false }, + { "port_%u_immigrations", dsw_xstats_port_get_immigrations, + false }, + { "port_%u_event_proc_latency", dsw_xstats_port_get_event_proc_latency, + false }, + { "port_%u_busy_cycles", dsw_xstats_port_get_busy_cycles, + false }, + { "port_%u_inflight_credits", dsw_xstats_port_get_inflight_credits, + false }, + { "port_%u_pending_releases", dsw_xstats_port_get_pending_releases, + false }, + { "port_%u_load", dsw_xstats_port_get_load, + false }, + { "port_%u_last_bg", dsw_xstats_port_get_last_bg, + false } +}; + +typedef +void (*dsw_xstats_foreach_fn)(const char *xstats_name, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, unsigned int xstats_id, + void *data); + +static void +dsw_xstats_dev_foreach(dsw_xstats_foreach_fn fn, void *fn_data) +{ + unsigned int i; + + for (i = 0; i < RTE_DIM(dsw_dev_xstats); i++) + fn(dsw_dev_xstats[i].name, RTE_EVENT_DEV_XSTATS_DEVICE, 0, + i, fn_data); +} + +static void +dsw_xstats_port_foreach(struct dsw_evdev *dsw, uint8_t port_id, + dsw_xstats_foreach_fn fn, void *fn_data) +{ + uint8_t queue_id; + unsigned int stat_idx; + + for (stat_idx = 0, queue_id = 0; + stat_idx < RTE_DIM(dsw_port_xstats);) { + struct dsw_xstats_port *xstat = &dsw_port_xstats[stat_idx]; + char xstats_name[RTE_EVENT_DEV_XSTATS_NAME_SIZE]; + unsigned int xstats_id; + + if (xstat->per_queue) { + xstats_id = DSW_XSTATS_ID_CREATE(stat_idx, queue_id); + snprintf(xstats_name, sizeof(xstats_name), + dsw_port_xstats[stat_idx].name_fmt, port_id, + queue_id); + queue_id++; + } else { + xstats_id = stat_idx; + snprintf(xstats_name, sizeof(xstats_name), + dsw_port_xstats[stat_idx].name_fmt, port_id); + } + + fn(xstats_name, RTE_EVENT_DEV_XSTATS_PORT, port_id, + xstats_id, fn_data); + + if (!(xstat->per_queue && queue_id < dsw->num_queues)) { + stat_idx++; + queue_id = 0; + } + } +} + +struct store_ctx { + struct rte_event_dev_xstats_name *names; + unsigned int *ids; + unsigned int count; + unsigned int capacity; +}; + +static void +dsw_xstats_store_stat(const char *xstats_name, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, unsigned int xstats_id, + void *data) +{ + struct store_ctx *ctx = data; + + RTE_SET_USED(mode); + RTE_SET_USED(queue_port_id); + + if (ctx->count < ctx->capacity) { + strcpy(ctx->names[ctx->count].name, xstats_name); + ctx->ids[ctx->count] = xstats_id; + } + + ctx->count++; +} + +int +dsw_xstats_get_names(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int capacity) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + + struct store_ctx ctx = { + .names = xstats_names, + .ids = ids, + .capacity = capacity + }; + + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: + dsw_xstats_dev_foreach(dsw_xstats_store_stat, &ctx); + return ctx.count; + case RTE_EVENT_DEV_XSTATS_PORT: + dsw_xstats_port_foreach(dsw, queue_port_id, + dsw_xstats_store_stat, &ctx); + return ctx.count; + case RTE_EVENT_DEV_XSTATS_QUEUE: + return 0; + default: + RTE_ASSERT(false); + return -1; + } +} + +static int +dsw_xstats_dev_get(const struct rte_eventdev *dev, + const unsigned int ids[], uint64_t values[], unsigned int n) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + unsigned int i; + + for (i = 0; i < n; i++) { + unsigned int id = ids[i]; + struct dsw_xstat_dev *xstat = &dsw_dev_xstats[id]; + values[i] = xstat->get_value_fn(dsw); + } + return n; +} + +static int +dsw_xstats_port_get(const struct rte_eventdev *dev, uint8_t port_id, + const unsigned int ids[], uint64_t values[], unsigned int n) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + unsigned int i; + + for (i = 0; i < n; i++) { + unsigned int id = ids[i]; + unsigned int stat_idx = DSW_XSTATS_ID_GET_STAT(id); + struct dsw_xstats_port *xstat = &dsw_port_xstats[stat_idx]; + uint8_t queue_id = 0; + + if (xstat->per_queue) + queue_id = DSW_XSTATS_ID_GET_PARAM(id); + + values[i] = xstat->get_value_fn(dsw, port_id, queue_id); + } + return n; +} + +int +dsw_xstats_get(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + const unsigned int ids[], uint64_t values[], unsigned int n) +{ + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: + return dsw_xstats_dev_get(dev, ids, values, n); + case RTE_EVENT_DEV_XSTATS_PORT: + return dsw_xstats_port_get(dev, queue_port_id, ids, values, n); + case RTE_EVENT_DEV_XSTATS_QUEUE: + return 0; + default: + RTE_ASSERT(false); + return -1; + } + return 0; +} + +struct find_ctx { + const struct rte_eventdev *dev; + const char *name; + unsigned int *id; + uint64_t value; +}; + +static void +dsw_xstats_find_stat(const char *xstats_name, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, unsigned int xstats_id, + void *data) +{ + struct find_ctx *ctx = data; + + if (strcmp(ctx->name, xstats_name) == 0) { + if (ctx->id != NULL) + *ctx->id = xstats_id; + dsw_xstats_get(ctx->dev, mode, queue_port_id, &xstats_id, + &ctx->value, 1); + } +} + +uint64_t +dsw_xstats_get_by_name(const struct rte_eventdev *dev, const char *name, + unsigned int *id) +{ + struct dsw_evdev *dsw = dsw_pmd_priv(dev); + uint16_t port_id; + + struct find_ctx ctx = { + .dev = dev, + .name = name, + .id = id, + .value = -EINVAL + }; + + dsw_xstats_dev_foreach(dsw_xstats_find_stat, &ctx); + + for (port_id = 0; port_id < dsw->num_ports; port_id++) + dsw_xstats_port_foreach(dsw, port_id, dsw_xstats_find_stat, + &ctx); + + return ctx.value; +} diff --git a/src/spdk/dpdk/drivers/event/dsw/meson.build b/src/spdk/dpdk/drivers/event/dsw/meson.build new file mode 100644 index 000000000..60ab13d90 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dsw/meson.build @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Ericsson AB + +deps += ['bus_vdev'] +if cc.has_argument('-Wno-format-nonliteral') + cflags += '-Wno-format-nonliteral' +endif +sources = files('dsw_evdev.c', 'dsw_event.c', 'dsw_xstats.c') diff --git a/src/spdk/dpdk/drivers/event/dsw/rte_pmd_dsw_event_version.map b/src/spdk/dpdk/drivers/event/dsw/rte_pmd_dsw_event_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/dsw/rte_pmd_dsw_event_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/event/meson.build b/src/spdk/dpdk/drivers/event/meson.build new file mode 100644 index 000000000..50d30c53f --- /dev/null +++ b/src/spdk/dpdk/drivers/event/meson.build @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +drivers = ['dpaa', 'dpaa2', 'octeontx2', 'opdl', 'skeleton', 'sw', 'dsw'] +if not (toolchain == 'gcc' and cc.version().version_compare('<4.8.6') and + dpdk_conf.has('RTE_ARCH_ARM64')) + drivers += 'octeontx' +endif +std_deps = ['eventdev', 'kvargs'] +config_flag_fmt = 'RTE_LIBRTE_@0@_EVENTDEV_PMD' +driver_name_fmt = 'rte_pmd_@0@_event' diff --git a/src/spdk/dpdk/drivers/event/octeontx/Makefile b/src/spdk/dpdk/drivers/event/octeontx/Makefile new file mode 100644 index 000000000..c1233e098 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/Makefile @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Cavium, Inc +# + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_octeontx_ssovf.a + +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -I$(RTE_SDK)/drivers/common/octeontx/ +CFLAGS += -I$(RTE_SDK)/drivers/mempool/octeontx/ +CFLAGS += -I$(RTE_SDK)/drivers/net/octeontx/ + +LDLIBS += -lrte_eal -lrte_eventdev -lrte_common_octeontx -lrte_pmd_octeontx +LDLIBS += -lrte_bus_pci -lrte_mempool -lrte_mbuf -lrte_kvargs +LDLIBS += -lrte_bus_vdev -lrte_ethdev + +EXPORT_MAP := rte_pmd_octeontx_event_version.map + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += ssovf_worker.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += ssovf_evdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += ssovf_evdev_selftest.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += ssovf_probe.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += timvf_worker.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += timvf_evdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += timvf_probe.c + +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_ssovf_worker.o += -fno-prefetch-loop-arrays +CFLAGS_timvf_worker.o += -fno-prefetch-loop-arrays + +ifeq ($(shell test $(GCC_VERSION) -ge 46 && echo 1), 1) +CFLAGS_ssovf_worker.o += -Ofast +CFLAGS_timvf_worker.o += -Ofast +else +CFLAGS_ssovf_worker.o += -O3 -ffast-math +CFLAGS_timvf_worker.o += -O3 -ffast-math +endif + +else +CFLAGS_ssovf_worker.o += -Ofast +CFLAGS_timvf_worker.o += -Ofast +endif + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/event/octeontx/meson.build b/src/spdk/dpdk/drivers/event/octeontx/meson.build new file mode 100644 index 000000000..2b74bb62d --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/meson.build @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Cavium, Inc + +sources = files('ssovf_worker.c', + 'ssovf_evdev.c', + 'ssovf_evdev_selftest.c', + 'ssovf_probe.c', + 'timvf_worker.c', + 'timvf_evdev.c', + 'timvf_probe.c' +) + +deps += ['common_octeontx', 'mempool_octeontx', 'bus_vdev', 'pmd_octeontx'] diff --git a/src/spdk/dpdk/drivers/event/octeontx/rte_pmd_octeontx_event_version.map b/src/spdk/dpdk/drivers/event/octeontx/rte_pmd_octeontx_event_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/rte_pmd_octeontx_event_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev.c b/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev.c new file mode 100644 index 000000000..1b1a5d939 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev.c @@ -0,0 +1,830 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include <inttypes.h> + +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_dev.h> +#include <rte_eal.h> +#include <rte_ethdev_driver.h> +#include <rte_event_eth_rx_adapter.h> +#include <rte_kvargs.h> +#include <rte_lcore.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_bus_vdev.h> + +#include "ssovf_evdev.h" +#include "timvf_evdev.h" + +int otx_logtype_ssovf; +static uint8_t timvf_enable_stats; + +RTE_INIT(otx_ssovf_init_log) +{ + otx_logtype_ssovf = rte_log_register("pmd.event.octeontx"); + if (otx_logtype_ssovf >= 0) + rte_log_set_level(otx_logtype_ssovf, RTE_LOG_NOTICE); +} + +/* SSOPF Mailbox messages */ + +struct ssovf_mbox_dev_info { + uint64_t min_deq_timeout_ns; + uint64_t max_deq_timeout_ns; + uint32_t max_num_events; +}; + +static int +ssovf_mbox_dev_info(struct ssovf_mbox_dev_info *info) +{ + struct octeontx_mbox_hdr hdr = {0}; + uint16_t len = sizeof(struct ssovf_mbox_dev_info); + + hdr.coproc = SSO_COPROC; + hdr.msg = SSO_GET_DEV_INFO; + hdr.vfid = 0; + + memset(info, 0, len); + return octeontx_mbox_send(&hdr, NULL, 0, info, len); +} + +struct ssovf_mbox_getwork_wait { + uint64_t wait_ns; +}; + +static int +ssovf_mbox_getwork_tmo_set(uint32_t timeout_ns) +{ + struct octeontx_mbox_hdr hdr = {0}; + struct ssovf_mbox_getwork_wait tmo_set; + uint16_t len = sizeof(struct ssovf_mbox_getwork_wait); + int ret; + + hdr.coproc = SSO_COPROC; + hdr.msg = SSO_SET_GETWORK_WAIT; + hdr.vfid = 0; + + tmo_set.wait_ns = timeout_ns; + ret = octeontx_mbox_send(&hdr, &tmo_set, len, NULL, 0); + if (ret) + ssovf_log_err("Failed to set getwork timeout(%d)", ret); + + return ret; +} + +struct ssovf_mbox_grp_pri { + uint8_t vhgrp_id; + uint8_t wgt_left; /* Read only */ + uint8_t weight; + uint8_t affinity; + uint8_t priority; +}; + +static int +ssovf_mbox_priority_set(uint8_t queue, uint8_t prio) +{ + struct octeontx_mbox_hdr hdr = {0}; + struct ssovf_mbox_grp_pri grp; + uint16_t len = sizeof(struct ssovf_mbox_grp_pri); + int ret; + + hdr.coproc = SSO_COPROC; + hdr.msg = SSO_GRP_SET_PRIORITY; + hdr.vfid = queue; + + grp.vhgrp_id = queue; + grp.weight = 0xff; + grp.affinity = 0xff; + grp.priority = prio / 32; /* Normalize to 0 to 7 */ + + ret = octeontx_mbox_send(&hdr, &grp, len, NULL, 0); + if (ret) + ssovf_log_err("Failed to set grp=%d prio=%d", queue, prio); + + return ret; +} + +struct ssovf_mbox_convert_ns_getworks_iter { + uint64_t wait_ns; + uint32_t getwork_iter;/* Get_work iterations for the given wait_ns */ +}; + +static int +ssovf_mbox_timeout_ticks(uint64_t ns, uint64_t *tmo_ticks) +{ + struct octeontx_mbox_hdr hdr = {0}; + struct ssovf_mbox_convert_ns_getworks_iter ns2iter; + uint16_t len = sizeof(ns2iter); + int ret; + + hdr.coproc = SSO_COPROC; + hdr.msg = SSO_CONVERT_NS_GETWORK_ITER; + hdr.vfid = 0; + + memset(&ns2iter, 0, len); + ns2iter.wait_ns = ns; + ret = octeontx_mbox_send(&hdr, &ns2iter, len, &ns2iter, len); + if (ret < 0 || (ret != len)) { + ssovf_log_err("Failed to get tmo ticks ns=%"PRId64"", ns); + return -EIO; + } + + *tmo_ticks = ns2iter.getwork_iter; + return 0; +} + +static void +ssovf_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *dev_info) +{ + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + + dev_info->driver_name = RTE_STR(EVENTDEV_NAME_OCTEONTX_PMD); + dev_info->min_dequeue_timeout_ns = edev->min_deq_timeout_ns; + dev_info->max_dequeue_timeout_ns = edev->max_deq_timeout_ns; + dev_info->max_event_queues = edev->max_event_queues; + dev_info->max_event_queue_flows = (1ULL << 20); + dev_info->max_event_queue_priority_levels = 8; + dev_info->max_event_priority_levels = 1; + dev_info->max_event_ports = edev->max_event_ports; + dev_info->max_event_port_dequeue_depth = 1; + dev_info->max_event_port_enqueue_depth = 1; + dev_info->max_num_events = edev->max_num_events; + dev_info->event_dev_cap = RTE_EVENT_DEV_CAP_QUEUE_QOS | + RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED | + RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES| + RTE_EVENT_DEV_CAP_RUNTIME_PORT_LINK | + RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT | + RTE_EVENT_DEV_CAP_NONSEQ_MODE; + +} + +static int +ssovf_configure(const struct rte_eventdev *dev) +{ + struct rte_event_dev_config *conf = &dev->data->dev_conf; + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + uint64_t deq_tmo_ns; + + ssovf_func_trace(); + deq_tmo_ns = conf->dequeue_timeout_ns; + if (deq_tmo_ns == 0) + deq_tmo_ns = edev->min_deq_timeout_ns; + + if (conf->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) { + edev->is_timeout_deq = 1; + deq_tmo_ns = edev->min_deq_timeout_ns; + } + edev->nb_event_queues = conf->nb_event_queues; + edev->nb_event_ports = conf->nb_event_ports; + + return ssovf_mbox_getwork_tmo_set(deq_tmo_ns); +} + +static void +ssovf_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id, + struct rte_event_queue_conf *queue_conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); + + queue_conf->nb_atomic_flows = (1ULL << 20); + queue_conf->nb_atomic_order_sequences = (1ULL << 20); + queue_conf->event_queue_cfg = RTE_EVENT_QUEUE_CFG_ALL_TYPES; + queue_conf->priority = RTE_EVENT_DEV_PRIORITY_NORMAL; +} + +static void +ssovf_queue_release(struct rte_eventdev *dev, uint8_t queue_id) +{ + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); +} + +static int +ssovf_queue_setup(struct rte_eventdev *dev, uint8_t queue_id, + const struct rte_event_queue_conf *queue_conf) +{ + RTE_SET_USED(dev); + ssovf_func_trace("queue=%d prio=%d", queue_id, queue_conf->priority); + + return ssovf_mbox_priority_set(queue_id, queue_conf->priority); +} + +static void +ssovf_port_def_conf(struct rte_eventdev *dev, uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + + RTE_SET_USED(port_id); + port_conf->new_event_threshold = edev->max_num_events; + port_conf->dequeue_depth = 1; + port_conf->enqueue_depth = 1; + port_conf->disable_implicit_release = 0; +} + +static void +ssovf_port_release(void *port) +{ + rte_free(port); +} + +static int +ssovf_port_setup(struct rte_eventdev *dev, uint8_t port_id, + const struct rte_event_port_conf *port_conf) +{ + struct ssows *ws; + uint32_t reg_off; + uint8_t q; + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + + ssovf_func_trace("port=%d", port_id); + RTE_SET_USED(port_conf); + + /* Free memory prior to re-allocation if needed */ + if (dev->data->ports[port_id] != NULL) { + ssovf_port_release(dev->data->ports[port_id]); + dev->data->ports[port_id] = NULL; + } + + /* Allocate event port memory */ + ws = rte_zmalloc_socket("eventdev ssows", + sizeof(struct ssows), RTE_CACHE_LINE_SIZE, + dev->data->socket_id); + if (ws == NULL) { + ssovf_log_err("Failed to alloc memory for port=%d", port_id); + return -ENOMEM; + } + + ws->base = ssovf_bar(OCTEONTX_SSO_HWS, port_id, 0); + if (ws->base == NULL) { + rte_free(ws); + ssovf_log_err("Failed to get hws base addr port=%d", port_id); + return -EINVAL; + } + + reg_off = SSOW_VHWS_OP_GET_WORK0; + reg_off |= 1 << 4; /* Index_ggrp_mask (Use maskset zero) */ + reg_off |= 1 << 16; /* Wait */ + ws->getwork = ws->base + reg_off; + ws->port = port_id; + ws->lookup_mem = octeontx_fastpath_lookup_mem_get(); + + for (q = 0; q < edev->nb_event_queues; q++) { + ws->grps[q] = ssovf_bar(OCTEONTX_SSO_GROUP, q, 2); + if (ws->grps[q] == NULL) { + rte_free(ws); + ssovf_log_err("Failed to get grp%d base addr", q); + return -EINVAL; + } + } + + dev->data->ports[port_id] = ws; + ssovf_log_dbg("port=%d ws=%p", port_id, ws); + return 0; +} + +static int +ssovf_port_link(struct rte_eventdev *dev, void *port, const uint8_t queues[], + const uint8_t priorities[], uint16_t nb_links) +{ + uint16_t link; + uint64_t val; + struct ssows *ws = port; + + ssovf_func_trace("port=%d nb_links=%d", ws->port, nb_links); + RTE_SET_USED(dev); + RTE_SET_USED(priorities); + + for (link = 0; link < nb_links; link++) { + val = queues[link]; + val |= (1ULL << 24); /* Set membership */ + ssovf_write64(val, ws->base + SSOW_VHWS_GRPMSK_CHGX(0)); + } + return (int)nb_links; +} + +static int +ssovf_port_unlink(struct rte_eventdev *dev, void *port, uint8_t queues[], + uint16_t nb_unlinks) +{ + uint16_t unlink; + uint64_t val; + struct ssows *ws = port; + + ssovf_func_trace("port=%d nb_links=%d", ws->port, nb_unlinks); + RTE_SET_USED(dev); + + for (unlink = 0; unlink < nb_unlinks; unlink++) { + val = queues[unlink]; + val &= ~(1ULL << 24); /* Clear membership */ + ssovf_write64(val, ws->base + SSOW_VHWS_GRPMSK_CHGX(0)); + } + return (int)nb_unlinks; +} + +static int +ssovf_timeout_ticks(struct rte_eventdev *dev, uint64_t ns, uint64_t *tmo_ticks) +{ + RTE_SET_USED(dev); + + return ssovf_mbox_timeout_ticks(ns, tmo_ticks); +} + +static void +ssows_dump(struct ssows *ws, FILE *f) +{ + uint8_t *base = ws->base; + uint64_t val; + + fprintf(f, "\t---------------port%d---------------\n", ws->port); + val = ssovf_read64(base + SSOW_VHWS_TAG); + fprintf(f, "\ttag=0x%x tt=%d head=%d tail=%d grp=%d index=%d tail=%d\n", + (uint32_t)(val & 0xffffffff), (int)(val >> 32) & 0x3, + (int)(val >> 34) & 0x1, (int)(val >> 35) & 0x1, + (int)(val >> 36) & 0x3ff, (int)(val >> 48) & 0x3ff, + (int)(val >> 63) & 0x1); + + val = ssovf_read64(base + SSOW_VHWS_WQP); + fprintf(f, "\twqp=0x%"PRIx64"\n", val); + + val = ssovf_read64(base + SSOW_VHWS_LINKS); + fprintf(f, "\tindex=%d valid=%d revlink=%d tail=%d head=%d grp=%d\n", + (int)(val & 0x3ff), (int)(val >> 10) & 0x1, + (int)(val >> 11) & 0x3ff, (int)(val >> 26) & 0x1, + (int)(val >> 27) & 0x1, (int)(val >> 28) & 0x3ff); + + val = ssovf_read64(base + SSOW_VHWS_PENDTAG); + fprintf(f, "\tptag=0x%x ptt=%d pgwi=%d pdesc=%d pgw=%d pgww=%d ps=%d\n", + (uint32_t)(val & 0xffffffff), (int)(val >> 32) & 0x3, + (int)(val >> 56) & 0x1, (int)(val >> 58) & 0x1, + (int)(val >> 61) & 0x1, (int)(val >> 62) & 0x1, + (int)(val >> 63) & 0x1); + + val = ssovf_read64(base + SSOW_VHWS_PENDWQP); + fprintf(f, "\tpwqp=0x%"PRIx64"\n", val); +} + +static int +ssovf_eth_rx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + RTE_SET_USED(dev); + + ret = strncmp(eth_dev->data->name, "eth_octeontx", 12); + if (ret) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +ssovf_eth_rx_adapter_queue_add(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + int ret = 0; + const struct octeontx_nic *nic = eth_dev->data->dev_private; + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + pki_mod_qos_t pki_qos; + RTE_SET_USED(dev); + + ret = strncmp(eth_dev->data->name, "eth_octeontx", 12); + if (ret) + return -EINVAL; + + if (rx_queue_id >= 0) + return -EINVAL; + + if (queue_conf->ev.sched_type == RTE_SCHED_TYPE_PARALLEL) + return -ENOTSUP; + + memset(&pki_qos, 0, sizeof(pki_mod_qos_t)); + + pki_qos.port_type = 0; + pki_qos.index = 0; + pki_qos.mmask.f_tag_type = 1; + pki_qos.mmask.f_port_add = 1; + pki_qos.mmask.f_grp_ok = 1; + pki_qos.mmask.f_grp_bad = 1; + pki_qos.mmask.f_grptag_ok = 1; + pki_qos.mmask.f_grptag_bad = 1; + + pki_qos.qos_entry.tag_type = queue_conf->ev.sched_type; + pki_qos.qos_entry.port_add = 0; + pki_qos.qos_entry.ggrp_ok = queue_conf->ev.queue_id; + pki_qos.qos_entry.ggrp_bad = queue_conf->ev.queue_id; + pki_qos.qos_entry.grptag_bad = 0; + pki_qos.qos_entry.grptag_ok = 0; + + ret = octeontx_pki_port_modify_qos(nic->port_id, &pki_qos); + if (ret < 0) + ssovf_log_err("failed to modify QOS, port=%d, q=%d", + nic->port_id, queue_conf->ev.queue_id); + + edev->rx_offload_flags = nic->rx_offload_flags; + edev->tx_offload_flags = nic->tx_offload_flags; + return ret; +} + +static int +ssovf_eth_rx_adapter_queue_del(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, int32_t rx_queue_id) +{ + int ret = 0; + const struct octeontx_nic *nic = eth_dev->data->dev_private; + pki_del_qos_t pki_qos; + RTE_SET_USED(dev); + + ret = strncmp(eth_dev->data->name, "eth_octeontx", 12); + if (ret) + return -EINVAL; + + pki_qos.port_type = 0; + pki_qos.index = 0; + memset(&pki_qos, 0, sizeof(pki_del_qos_t)); + ret = octeontx_pki_port_delete_qos(nic->port_id, &pki_qos); + if (ret < 0) + ssovf_log_err("Failed to delete QOS port=%d, q=%d", + nic->port_id, rx_queue_id); + return ret; +} + +static int +ssovf_eth_rx_adapter_start(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + + return 0; +} + + +static int +ssovf_eth_rx_adapter_stop(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +static int +ssovf_eth_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + RTE_SET_USED(dev); + + ret = strncmp(eth_dev->data->name, "eth_octeontx", 12); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +ssovf_eth_tx_adapter_create(uint8_t id, const struct rte_eventdev *dev) +{ + RTE_SET_USED(id); + RTE_SET_USED(dev); + return 0; +} + +static int +ssovf_eth_tx_adapter_free(uint8_t id, const struct rte_eventdev *dev) +{ + RTE_SET_USED(id); + RTE_SET_USED(dev); + return 0; +} + +static int +ssovf_eth_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, int32_t tx_queue_id) +{ + RTE_SET_USED(id); + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + RTE_SET_USED(tx_queue_id); + return 0; +} + +static int +ssovf_eth_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, int32_t tx_queue_id) +{ + RTE_SET_USED(id); + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + RTE_SET_USED(tx_queue_id); + return 0; +} + +static int +ssovf_eth_tx_adapter_start(uint8_t id, const struct rte_eventdev *dev) +{ + RTE_SET_USED(id); + RTE_SET_USED(dev); + return 0; +} + +static int +ssovf_eth_tx_adapter_stop(uint8_t id, const struct rte_eventdev *dev) +{ + RTE_SET_USED(id); + RTE_SET_USED(dev); + return 0; +} + + +static void +ssovf_dump(struct rte_eventdev *dev, FILE *f) +{ + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + uint8_t port; + + /* Dump SSOWVF debug registers */ + for (port = 0; port < edev->nb_event_ports; port++) + ssows_dump(dev->data->ports[port], f); +} + +static int +ssovf_start(struct rte_eventdev *dev) +{ + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + struct ssows *ws; + uint8_t *base; + uint8_t i; + + ssovf_func_trace(); + for (i = 0; i < edev->nb_event_ports; i++) { + ws = dev->data->ports[i]; + ssows_reset(ws); + ws->swtag_req = 0; + } + + for (i = 0; i < edev->nb_event_queues; i++) { + /* Consume all the events through HWS0 */ + ssows_flush_events(dev->data->ports[0], i, NULL, NULL); + + base = ssovf_bar(OCTEONTX_SSO_GROUP, i, 0); + base += SSO_VHGRP_QCTL; + ssovf_write64(1, base); /* Enable SSO group */ + } + + ssovf_fastpath_fns_set(dev); + return 0; +} + +static void +ssows_handle_event(void *arg, struct rte_event event) +{ + struct rte_eventdev *dev = arg; + + if (dev->dev_ops->dev_stop_flush != NULL) + dev->dev_ops->dev_stop_flush(dev->data->dev_id, event, + dev->data->dev_stop_flush_arg); +} + +static void +ssovf_stop(struct rte_eventdev *dev) +{ + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + struct ssows *ws; + uint8_t *base; + uint8_t i; + + ssovf_func_trace(); + for (i = 0; i < edev->nb_event_ports; i++) { + ws = dev->data->ports[i]; + ssows_reset(ws); + ws->swtag_req = 0; + } + + for (i = 0; i < edev->nb_event_queues; i++) { + /* Consume all the events through HWS0 */ + ssows_flush_events(dev->data->ports[0], i, + ssows_handle_event, dev); + + base = ssovf_bar(OCTEONTX_SSO_GROUP, i, 0); + base += SSO_VHGRP_QCTL; + ssovf_write64(0, base); /* Disable SSO group */ + } +} + +static int +ssovf_close(struct rte_eventdev *dev) +{ + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + uint8_t all_queues[RTE_EVENT_MAX_QUEUES_PER_DEV]; + uint8_t i; + + for (i = 0; i < edev->nb_event_queues; i++) + all_queues[i] = i; + + for (i = 0; i < edev->nb_event_ports; i++) + ssovf_port_unlink(dev, dev->data->ports[i], all_queues, + edev->nb_event_queues); + return 0; +} + +static int +ssovf_selftest(const char *key __rte_unused, const char *value, + void *opaque) +{ + int *flag = opaque; + *flag = !!atoi(value); + return 0; +} + +static int +ssovf_timvf_caps_get(const struct rte_eventdev *dev, uint64_t flags, + uint32_t *caps, const struct rte_event_timer_adapter_ops **ops) +{ + return timvf_timer_adapter_caps_get(dev, flags, caps, ops, + timvf_enable_stats); +} + +/* Initialize and register event driver with DPDK Application */ +static struct rte_eventdev_ops ssovf_ops = { + .dev_infos_get = ssovf_info_get, + .dev_configure = ssovf_configure, + .queue_def_conf = ssovf_queue_def_conf, + .queue_setup = ssovf_queue_setup, + .queue_release = ssovf_queue_release, + .port_def_conf = ssovf_port_def_conf, + .port_setup = ssovf_port_setup, + .port_release = ssovf_port_release, + .port_link = ssovf_port_link, + .port_unlink = ssovf_port_unlink, + .timeout_ticks = ssovf_timeout_ticks, + + .eth_rx_adapter_caps_get = ssovf_eth_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = ssovf_eth_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = ssovf_eth_rx_adapter_queue_del, + .eth_rx_adapter_start = ssovf_eth_rx_adapter_start, + .eth_rx_adapter_stop = ssovf_eth_rx_adapter_stop, + + .eth_tx_adapter_caps_get = ssovf_eth_tx_adapter_caps_get, + .eth_tx_adapter_create = ssovf_eth_tx_adapter_create, + .eth_tx_adapter_free = ssovf_eth_tx_adapter_free, + .eth_tx_adapter_queue_add = ssovf_eth_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = ssovf_eth_tx_adapter_queue_del, + .eth_tx_adapter_start = ssovf_eth_tx_adapter_start, + .eth_tx_adapter_stop = ssovf_eth_tx_adapter_stop, + + .timer_adapter_caps_get = ssovf_timvf_caps_get, + + .dev_selftest = test_eventdev_octeontx, + + .dump = ssovf_dump, + .dev_start = ssovf_start, + .dev_stop = ssovf_stop, + .dev_close = ssovf_close +}; + +static int +ssovf_vdev_probe(struct rte_vdev_device *vdev) +{ + struct ssovf_info oinfo; + struct ssovf_mbox_dev_info info; + struct ssovf_evdev *edev; + struct rte_eventdev *eventdev; + static int ssovf_init_once; + const char *name; + const char *params; + int ret; + int selftest = 0; + + static const char *const args[] = { + SSOVF_SELFTEST_ARG, + TIMVF_ENABLE_STATS_ARG, + NULL + }; + + name = rte_vdev_device_name(vdev); + /* More than one instance is not supported */ + if (ssovf_init_once) { + ssovf_log_err("Request to create >1 %s instance", name); + return -EINVAL; + } + + params = rte_vdev_device_args(vdev); + if (params != NULL && params[0] != '\0') { + struct rte_kvargs *kvlist = rte_kvargs_parse(params, args); + + if (!kvlist) { + ssovf_log_info( + "Ignoring unsupported params supplied '%s'", + name); + } else { + int ret = rte_kvargs_process(kvlist, + SSOVF_SELFTEST_ARG, + ssovf_selftest, &selftest); + if (ret != 0) { + ssovf_log_err("%s: Error in selftest", name); + rte_kvargs_free(kvlist); + return ret; + } + + ret = rte_kvargs_process(kvlist, + TIMVF_ENABLE_STATS_ARG, + ssovf_selftest, &timvf_enable_stats); + if (ret != 0) { + ssovf_log_err("%s: Error in timvf stats", name); + rte_kvargs_free(kvlist); + return ret; + } + } + + rte_kvargs_free(kvlist); + } + + eventdev = rte_event_pmd_vdev_init(name, sizeof(struct ssovf_evdev), + rte_socket_id()); + if (eventdev == NULL) { + ssovf_log_err("Failed to create eventdev vdev %s", name); + return -ENOMEM; + } + eventdev->dev_ops = &ssovf_ops; + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + ssovf_fastpath_fns_set(eventdev); + return 0; + } + + octeontx_mbox_init(); + ret = ssovf_info(&oinfo); + if (ret) { + ssovf_log_err("Failed to probe and validate ssovfs %d", ret); + goto error; + } + + edev = ssovf_pmd_priv(eventdev); + edev->max_event_ports = oinfo.total_ssowvfs; + edev->max_event_queues = oinfo.total_ssovfs; + edev->is_timeout_deq = 0; + + ret = ssovf_mbox_dev_info(&info); + if (ret < 0 || ret != sizeof(struct ssovf_mbox_dev_info)) { + ssovf_log_err("Failed to get mbox devinfo %d", ret); + goto error; + } + + edev->min_deq_timeout_ns = info.min_deq_timeout_ns; + edev->max_deq_timeout_ns = info.max_deq_timeout_ns; + edev->max_num_events = info.max_num_events; + ssovf_log_dbg("min_deq_tmo=%"PRId64" max_deq_tmo=%"PRId64" max_evts=%d", + info.min_deq_timeout_ns, info.max_deq_timeout_ns, + info.max_num_events); + + if (!edev->max_event_ports || !edev->max_event_queues) { + ssovf_log_err("Not enough eventdev resource queues=%d ports=%d", + edev->max_event_queues, edev->max_event_ports); + ret = -ENODEV; + goto error; + } + + ssovf_log_info("Initializing %s domain=%d max_queues=%d max_ports=%d", + name, oinfo.domain, edev->max_event_queues, + edev->max_event_ports); + + ssovf_init_once = 1; + if (selftest) + test_eventdev_octeontx(); + return 0; + +error: + rte_event_pmd_vdev_uninit(name); + return ret; +} + +static int +ssovf_vdev_remove(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + ssovf_log_info("Closing %s", name); + return rte_event_pmd_vdev_uninit(name); +} + +static struct rte_vdev_driver vdev_ssovf_pmd = { + .probe = ssovf_vdev_probe, + .remove = ssovf_vdev_remove +}; + +RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_OCTEONTX_PMD, vdev_ssovf_pmd); diff --git a/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev.h b/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev.h new file mode 100644 index 000000000..aa5acf246 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#ifndef __SSOVF_EVDEV_H__ +#define __SSOVF_EVDEV_H__ + +#include <rte_event_eth_tx_adapter.h> +#include <rte_eventdev_pmd_vdev.h> +#include <rte_io.h> + +#include <octeontx_mbox.h> +#include <octeontx_ethdev.h> + +#include "octeontx_rxtx.h" + +#define SSO_RX_ADPTR_ENQ_FASTPATH_FUNC OCCTX_RX_FASTPATH_MODES +#define SSO_TX_ADPTR_ENQ_FASTPATH_FUNC OCCTX_TX_FASTPATH_MODES + +#define EVENTDEV_NAME_OCTEONTX_PMD event_octeontx + +#define SSOVF_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, otx_logtype_ssovf, \ + "[%s] %s() " fmt "\n", \ + RTE_STR(EVENTDEV_NAME_OCTEONTX_PMD), __func__, ## args) + +#define ssovf_log_info(fmt, ...) SSOVF_LOG(INFO, fmt, ##__VA_ARGS__) +#define ssovf_log_dbg(fmt, ...) SSOVF_LOG(DEBUG, fmt, ##__VA_ARGS__) +#define ssovf_log_err(fmt, ...) SSOVF_LOG(ERR, fmt, ##__VA_ARGS__) +#define ssovf_func_trace ssovf_log_dbg +#define ssovf_log_selftest ssovf_log_info + +#define SSO_MAX_VHGRP (64) +#define SSO_MAX_VHWS (32) + +/* SSO VF register offsets */ +#define SSO_VHGRP_QCTL (0x010ULL) +#define SSO_VHGRP_INT (0x100ULL) +#define SSO_VHGRP_INT_W1S (0x108ULL) +#define SSO_VHGRP_INT_ENA_W1S (0x110ULL) +#define SSO_VHGRP_INT_ENA_W1C (0x118ULL) +#define SSO_VHGRP_INT_THR (0x140ULL) +#define SSO_VHGRP_INT_CNT (0x180ULL) +#define SSO_VHGRP_XAQ_CNT (0x1B0ULL) +#define SSO_VHGRP_AQ_CNT (0x1C0ULL) +#define SSO_VHGRP_AQ_THR (0x1E0ULL) + +/* BAR2 */ +#define SSO_VHGRP_OP_ADD_WORK0 (0x00ULL) +#define SSO_VHGRP_OP_ADD_WORK1 (0x08ULL) + +/* SSOW VF register offsets (BAR0) */ +#define SSOW_VHWS_GRPMSK_CHGX(x) (0x080ULL | ((x) << 3)) +#define SSOW_VHWS_TAG (0x300ULL) +#define SSOW_VHWS_WQP (0x308ULL) +#define SSOW_VHWS_LINKS (0x310ULL) +#define SSOW_VHWS_PENDTAG (0x340ULL) +#define SSOW_VHWS_PENDWQP (0x348ULL) +#define SSOW_VHWS_SWTP (0x400ULL) +#define SSOW_VHWS_OP_ALLOC_WE (0x410ULL) +#define SSOW_VHWS_OP_UPD_WQP_GRP0 (0x440ULL) +#define SSOW_VHWS_OP_UPD_WQP_GRP1 (0x448ULL) +#define SSOW_VHWS_OP_SWTAG_UNTAG (0x490ULL) +#define SSOW_VHWS_OP_SWTAG_CLR (0x820ULL) +#define SSOW_VHWS_OP_DESCHED (0x860ULL) +#define SSOW_VHWS_OP_DESCHED_NOSCH (0x870ULL) +#define SSOW_VHWS_OP_SWTAG_DESCHED (0x8C0ULL) +#define SSOW_VHWS_OP_SWTAG_NOSCHED (0x8D0ULL) +#define SSOW_VHWS_OP_SWTP_SET (0xC20ULL) +#define SSOW_VHWS_OP_SWTAG_NORM (0xC80ULL) +#define SSOW_VHWS_OP_SWTAG_FULL0 (0xCA0UL) +#define SSOW_VHWS_OP_SWTAG_FULL1 (0xCA8ULL) +#define SSOW_VHWS_OP_CLR_NSCHED (0x10000ULL) +#define SSOW_VHWS_OP_GET_WORK0 (0x80000ULL) +#define SSOW_VHWS_OP_GET_WORK1 (0x80008ULL) + +/* Mailbox message constants */ +#define SSO_COPROC 0x2 + +#define SSO_GETDOMAINCFG 0x1 +#define SSO_IDENTIFY 0x2 +#define SSO_GET_DEV_INFO 0x3 +#define SSO_GET_GETWORK_WAIT 0x4 +#define SSO_SET_GETWORK_WAIT 0x5 +#define SSO_CONVERT_NS_GETWORK_ITER 0x6 +#define SSO_GRP_GET_PRIORITY 0x7 +#define SSO_GRP_SET_PRIORITY 0x8 + +#define SSOVF_SELFTEST_ARG ("selftest") + +/* + * In Cavium OCTEON TX SoC, all accesses to the device registers are + * implictly strongly ordered. So, The relaxed version of IO operation is + * safe to use with out any IO memory barriers. + */ +#define ssovf_read64 rte_read64_relaxed +#define ssovf_write64 rte_write64_relaxed + +/* ARM64 specific functions */ +#if defined(RTE_ARCH_ARM64) +#define ssovf_load_pair(val0, val1, addr) ({ \ + asm volatile( \ + "ldp %x[x0], %x[x1], [%x[p1]]" \ + :[x0]"=r"(val0), [x1]"=r"(val1) \ + :[p1]"r"(addr) \ + ); }) + +#define ssovf_store_pair(val0, val1, addr) ({ \ + asm volatile( \ + "stp %x[x0], %x[x1], [%x[p1]]" \ + ::[x0]"r"(val0), [x1]"r"(val1), [p1]"r"(addr) \ + ); }) +#else /* Un optimized functions for building on non arm64 arch */ + +#define ssovf_load_pair(val0, val1, addr) \ +do { \ + val0 = rte_read64(addr); \ + val1 = rte_read64(((uint8_t *)addr) + 8); \ +} while (0) + +#define ssovf_store_pair(val0, val1, addr) \ +do { \ + rte_write64(val0, addr); \ + rte_write64(val1, (((uint8_t *)addr) + 8)); \ +} while (0) +#endif + +struct ssovf_info { + uint16_t domain; /* Domain id */ + uint8_t total_ssovfs; /* Total sso groups available in domain */ + uint8_t total_ssowvfs;/* Total sso hws available in domain */ +}; + +enum ssovf_type { + OCTEONTX_SSO_GROUP, /* SSO group vf */ + OCTEONTX_SSO_HWS, /* SSO hardware workslot vf */ +}; + +struct ssovf_evdev { + OFFLOAD_FLAGS; /*Sequence should not be changed */ + uint8_t max_event_queues; + uint8_t max_event_ports; + uint8_t is_timeout_deq; + uint8_t nb_event_queues; + uint8_t nb_event_ports; + uint32_t min_deq_timeout_ns; + uint32_t max_deq_timeout_ns; + int32_t max_num_events; +} __rte_cache_aligned; + +/* Event port aka HWS */ +struct ssows { + uint8_t cur_tt; + uint8_t cur_grp; + uint8_t swtag_req; + uint8_t *base; + uint8_t *getwork; + uint8_t *grps[SSO_MAX_VHGRP]; + uint8_t port; + void *lookup_mem; +} __rte_cache_aligned; + +static inline struct ssovf_evdev * +ssovf_pmd_priv(const struct rte_eventdev *eventdev) +{ + return eventdev->data->dev_private; +} + +extern int otx_logtype_ssovf; + +uint16_t ssows_enq(void *port, const struct rte_event *ev); +uint16_t ssows_enq_burst(void *port, + const struct rte_event ev[], uint16_t nb_events); +uint16_t ssows_enq_new_burst(void *port, + const struct rte_event ev[], uint16_t nb_events); +uint16_t ssows_enq_fwd_burst(void *port, + const struct rte_event ev[], uint16_t nb_events); +typedef void (*ssows_handle_event_t)(void *arg, struct rte_event ev); +void ssows_flush_events(struct ssows *ws, uint8_t queue_id, + ssows_handle_event_t fn, void *arg); +void ssows_reset(struct ssows *ws); +int ssovf_info(struct ssovf_info *info); +void *ssovf_bar(enum ssovf_type, uint8_t id, uint8_t bar); +int test_eventdev_octeontx(void); +void ssovf_fastpath_fns_set(struct rte_eventdev *dev); +void *octeontx_fastpath_lookup_mem_get(void); + +#endif /* __SSOVF_EVDEV_H__ */ diff --git a/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev_selftest.c b/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev_selftest.c new file mode 100644 index 000000000..239362fcf --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/ssovf_evdev_selftest.c @@ -0,0 +1,1523 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include <rte_atomic.h> +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_debug.h> +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_eventdev.h> +#include <rte_hexdump.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_memcpy.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_random.h> +#include <rte_bus_vdev.h> +#include <rte_test.h> + +#include "ssovf_evdev.h" + +#define NUM_PACKETS (1 << 18) +#define MAX_EVENTS (16 * 1024) + +#define OCTEONTX_TEST_RUN(setup, teardown, test) \ + octeontx_test_run(setup, teardown, test, #test) + +static int total; +static int passed; +static int failed; +static int unsupported; + +static int evdev; +static struct rte_mempool *eventdev_test_mempool; + +struct event_attr { + uint32_t flow_id; + uint8_t event_type; + uint8_t sub_event_type; + uint8_t sched_type; + uint8_t queue; + uint8_t port; +}; + +static uint32_t seqn_list_index; +static int seqn_list[NUM_PACKETS]; + +static inline void +seqn_list_init(void) +{ + RTE_BUILD_BUG_ON(NUM_PACKETS < MAX_EVENTS); + memset(seqn_list, 0, sizeof(seqn_list)); + seqn_list_index = 0; +} + +static inline int +seqn_list_update(int val) +{ + if (seqn_list_index >= NUM_PACKETS) + return -1; + + seqn_list[seqn_list_index++] = val; + rte_smp_wmb(); + return 0; +} + +static inline int +seqn_list_check(int limit) +{ + int i; + + for (i = 0; i < limit; i++) { + if (seqn_list[i] != i) { + ssovf_log_dbg("Seqn mismatch %d %d", seqn_list[i], i); + return -1; + } + } + return 0; +} + +struct test_core_param { + rte_atomic32_t *total_events; + uint64_t dequeue_tmo_ticks; + uint8_t port; + uint8_t sched_type; +}; + +static int +testsuite_setup(void) +{ + const char *eventdev_name = "event_octeontx"; + + evdev = rte_event_dev_get_dev_id(eventdev_name); + if (evdev < 0) { + ssovf_log_dbg("%d: Eventdev %s not found - creating.", + __LINE__, eventdev_name); + if (rte_vdev_init(eventdev_name, NULL) < 0) { + ssovf_log_dbg("Error creating eventdev %s", + eventdev_name); + return -1; + } + evdev = rte_event_dev_get_dev_id(eventdev_name); + if (evdev < 0) { + ssovf_log_dbg("Error finding newly created eventdev"); + return -1; + } + } + + return 0; +} + +static void +testsuite_teardown(void) +{ + rte_event_dev_close(evdev); +} + +static inline void +devconf_set_default_sane_values(struct rte_event_dev_config *dev_conf, + struct rte_event_dev_info *info) +{ + memset(dev_conf, 0, sizeof(struct rte_event_dev_config)); + dev_conf->dequeue_timeout_ns = info->min_dequeue_timeout_ns; + dev_conf->nb_event_ports = info->max_event_ports; + dev_conf->nb_event_queues = info->max_event_queues; + dev_conf->nb_event_queue_flows = info->max_event_queue_flows; + dev_conf->nb_event_port_dequeue_depth = + info->max_event_port_dequeue_depth; + dev_conf->nb_event_port_enqueue_depth = + info->max_event_port_enqueue_depth; + dev_conf->nb_event_port_enqueue_depth = + info->max_event_port_enqueue_depth; + dev_conf->nb_events_limit = + info->max_num_events; +} + +enum { + TEST_EVENTDEV_SETUP_DEFAULT, + TEST_EVENTDEV_SETUP_PRIORITY, + TEST_EVENTDEV_SETUP_DEQUEUE_TIMEOUT, +}; + +static inline int +_eventdev_setup(int mode) +{ + int i, ret; + struct rte_event_dev_config dev_conf; + struct rte_event_dev_info info; + const char *pool_name = "evdev_octeontx_test_pool"; + + /* Create and destrory pool for each test case to make it standalone */ + eventdev_test_mempool = rte_pktmbuf_pool_create(pool_name, + MAX_EVENTS, + 0 /*MBUF_CACHE_SIZE*/, + 0, + 512, /* Use very small mbufs */ + rte_socket_id()); + if (!eventdev_test_mempool) { + ssovf_log_dbg("ERROR creating mempool"); + return -1; + } + + ret = rte_event_dev_info_get(evdev, &info); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get event dev info"); + RTE_TEST_ASSERT(info.max_num_events >= (int32_t)MAX_EVENTS, + "ERROR max_num_events=%d < max_events=%d", + info.max_num_events, MAX_EVENTS); + + devconf_set_default_sane_values(&dev_conf, &info); + if (mode == TEST_EVENTDEV_SETUP_DEQUEUE_TIMEOUT) + dev_conf.event_dev_cfg |= RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT; + + ret = rte_event_dev_configure(evdev, &dev_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to configure eventdev"); + + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + + if (mode == TEST_EVENTDEV_SETUP_PRIORITY) { + if (queue_count > 8) { + ssovf_log_dbg( + "test expects the unique priority per queue"); + return -ENOTSUP; + } + + /* Configure event queues(0 to n) with + * RTE_EVENT_DEV_PRIORITY_HIGHEST to + * RTE_EVENT_DEV_PRIORITY_LOWEST + */ + uint8_t step = (RTE_EVENT_DEV_PRIORITY_LOWEST + 1) / + queue_count; + for (i = 0; i < (int)queue_count; i++) { + struct rte_event_queue_conf queue_conf; + + ret = rte_event_queue_default_conf_get(evdev, i, + &queue_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get def_conf%d", + i); + queue_conf.priority = i * step; + ret = rte_event_queue_setup(evdev, i, &queue_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup queue=%d", + i); + } + + } else { + /* Configure event queues with default priority */ + for (i = 0; i < (int)queue_count; i++) { + ret = rte_event_queue_setup(evdev, i, NULL); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup queue=%d", + i); + } + } + /* Configure event ports */ + uint32_t port_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &port_count), "Port count get failed"); + for (i = 0; i < (int)port_count; i++) { + ret = rte_event_port_setup(evdev, i, NULL); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup port=%d", i); + ret = rte_event_port_link(evdev, i, NULL, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, "Failed to link all queues port=%d", + i); + } + + ret = rte_event_dev_start(evdev); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to start device"); + + return 0; +} + +static inline int +eventdev_setup(void) +{ + return _eventdev_setup(TEST_EVENTDEV_SETUP_DEFAULT); +} + +static inline int +eventdev_setup_priority(void) +{ + return _eventdev_setup(TEST_EVENTDEV_SETUP_PRIORITY); +} + +static inline int +eventdev_setup_dequeue_timeout(void) +{ + return _eventdev_setup(TEST_EVENTDEV_SETUP_DEQUEUE_TIMEOUT); +} + +static inline void +eventdev_teardown(void) +{ + rte_event_dev_stop(evdev); + rte_mempool_free(eventdev_test_mempool); +} + +static inline void +update_event_and_validation_attr(struct rte_mbuf *m, struct rte_event *ev, + uint32_t flow_id, uint8_t event_type, + uint8_t sub_event_type, uint8_t sched_type, + uint8_t queue, uint8_t port) +{ + struct event_attr *attr; + + /* Store the event attributes in mbuf for future reference */ + attr = rte_pktmbuf_mtod(m, struct event_attr *); + attr->flow_id = flow_id; + attr->event_type = event_type; + attr->sub_event_type = sub_event_type; + attr->sched_type = sched_type; + attr->queue = queue; + attr->port = port; + + ev->flow_id = flow_id; + ev->sub_event_type = sub_event_type; + ev->event_type = event_type; + /* Inject the new event */ + ev->op = RTE_EVENT_OP_NEW; + ev->sched_type = sched_type; + ev->queue_id = queue; + ev->mbuf = m; +} + +static inline int +inject_events(uint32_t flow_id, uint8_t event_type, uint8_t sub_event_type, + uint8_t sched_type, uint8_t queue, uint8_t port, + unsigned int events) +{ + struct rte_mbuf *m; + unsigned int i; + + for (i = 0; i < events; i++) { + struct rte_event ev = {.event = 0, .u64 = 0}; + + m = rte_pktmbuf_alloc(eventdev_test_mempool); + RTE_TEST_ASSERT_NOT_NULL(m, "mempool alloc failed"); + + m->seqn = i; + update_event_and_validation_attr(m, &ev, flow_id, event_type, + sub_event_type, sched_type, queue, port); + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + return 0; +} + +static inline int +check_excess_events(uint8_t port) +{ + int i; + uint16_t valid_event; + struct rte_event ev; + + /* Check for excess events, try for a few times and exit */ + for (i = 0; i < 32; i++) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + + RTE_TEST_ASSERT_SUCCESS(valid_event, + "Unexpected valid event=%d", ev.mbuf->seqn); + } + return 0; +} + +static inline int +generate_random_events(const unsigned int total_events) +{ + struct rte_event_dev_info info; + unsigned int i; + int ret; + + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + + ret = rte_event_dev_info_get(evdev, &info); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get event dev info"); + for (i = 0; i < total_events; i++) { + ret = inject_events( + rte_rand() % info.max_event_queue_flows /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + rte_rand() % queue_count /* queue */, + 0 /* port */, + 1 /* events */); + if (ret) + return -1; + } + return ret; +} + + +static inline int +validate_event(struct rte_event *ev) +{ + struct event_attr *attr; + + attr = rte_pktmbuf_mtod(ev->mbuf, struct event_attr *); + RTE_TEST_ASSERT_EQUAL(attr->flow_id, ev->flow_id, + "flow_id mismatch enq=%d deq =%d", + attr->flow_id, ev->flow_id); + RTE_TEST_ASSERT_EQUAL(attr->event_type, ev->event_type, + "event_type mismatch enq=%d deq =%d", + attr->event_type, ev->event_type); + RTE_TEST_ASSERT_EQUAL(attr->sub_event_type, ev->sub_event_type, + "sub_event_type mismatch enq=%d deq =%d", + attr->sub_event_type, ev->sub_event_type); + RTE_TEST_ASSERT_EQUAL(attr->sched_type, ev->sched_type, + "sched_type mismatch enq=%d deq =%d", + attr->sched_type, ev->sched_type); + RTE_TEST_ASSERT_EQUAL(attr->queue, ev->queue_id, + "queue mismatch enq=%d deq =%d", + attr->queue, ev->queue_id); + return 0; +} + +typedef int (*validate_event_cb)(uint32_t index, uint8_t port, + struct rte_event *ev); + +static inline int +consume_events(uint8_t port, const uint32_t total_events, validate_event_cb fn) +{ + int ret; + uint16_t valid_event; + uint32_t events = 0, forward_progress_cnt = 0, index = 0; + struct rte_event ev; + + while (1) { + if (++forward_progress_cnt > UINT16_MAX) { + ssovf_log_dbg("Detected deadlock"); + return -1; + } + + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + forward_progress_cnt = 0; + ret = validate_event(&ev); + if (ret) + return -1; + + if (fn != NULL) { + ret = fn(index, port, &ev); + RTE_TEST_ASSERT_SUCCESS(ret, + "Failed to validate test specific event"); + } + + ++index; + + rte_pktmbuf_free(ev.mbuf); + if (++events >= total_events) + break; + } + + return check_excess_events(port); +} + +static int +validate_simple_enqdeq(uint32_t index, uint8_t port, struct rte_event *ev) +{ + RTE_SET_USED(port); + RTE_TEST_ASSERT_EQUAL(index, ev->mbuf->seqn, "index=%d != seqn=%d", + index, ev->mbuf->seqn); + return 0; +} + +static inline int +test_simple_enqdeq(uint8_t sched_type) +{ + int ret; + + ret = inject_events(0 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type */, + sched_type, + 0 /* queue */, + 0 /* port */, + MAX_EVENTS); + if (ret) + return -1; + + return consume_events(0 /* port */, MAX_EVENTS, validate_simple_enqdeq); +} + +static int +test_simple_enqdeq_ordered(void) +{ + return test_simple_enqdeq(RTE_SCHED_TYPE_ORDERED); +} + +static int +test_simple_enqdeq_atomic(void) +{ + return test_simple_enqdeq(RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_simple_enqdeq_parallel(void) +{ + return test_simple_enqdeq(RTE_SCHED_TYPE_PARALLEL); +} + +/* + * Generate a prescribed number of events and spread them across available + * queues. On dequeue, using single event port(port 0) verify the enqueued + * event attributes + */ +static int +test_multi_queue_enq_single_port_deq(void) +{ + int ret; + + ret = generate_random_events(MAX_EVENTS); + if (ret) + return -1; + + return consume_events(0 /* port */, MAX_EVENTS, NULL); +} + +/* + * Inject 0..MAX_EVENTS events over 0..queue_count with modulus + * operation + * + * For example, Inject 32 events over 0..7 queues + * enqueue events 0, 8, 16, 24 in queue 0 + * enqueue events 1, 9, 17, 25 in queue 1 + * .. + * .. + * enqueue events 7, 15, 23, 31 in queue 7 + * + * On dequeue, Validate the events comes in 0,8,16,24,1,9,17,25..,7,15,23,31 + * order from queue0(highest priority) to queue7(lowest_priority) + */ +static int +validate_queue_priority(uint32_t index, uint8_t port, struct rte_event *ev) +{ + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + uint32_t range = MAX_EVENTS / queue_count; + uint32_t expected_val = (index % range) * queue_count; + + expected_val += ev->queue_id; + RTE_SET_USED(port); + RTE_TEST_ASSERT_EQUAL(ev->mbuf->seqn, expected_val, + "seqn=%d index=%d expected=%d range=%d nb_queues=%d max_event=%d", + ev->mbuf->seqn, index, expected_val, range, + queue_count, MAX_EVENTS); + return 0; +} + +static int +test_multi_queue_priority(void) +{ + uint8_t queue; + struct rte_mbuf *m; + int i, max_evts_roundoff; + + /* See validate_queue_priority() comments for priority validate logic */ + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + max_evts_roundoff = MAX_EVENTS / queue_count; + max_evts_roundoff *= queue_count; + + for (i = 0; i < max_evts_roundoff; i++) { + struct rte_event ev = {.event = 0, .u64 = 0}; + + m = rte_pktmbuf_alloc(eventdev_test_mempool); + RTE_TEST_ASSERT_NOT_NULL(m, "mempool alloc failed"); + + m->seqn = i; + queue = i % queue_count; + update_event_and_validation_attr(m, &ev, 0, RTE_EVENT_TYPE_CPU, + 0, RTE_SCHED_TYPE_PARALLEL, queue, 0); + rte_event_enqueue_burst(evdev, 0, &ev, 1); + } + + return consume_events(0, max_evts_roundoff, validate_queue_priority); +} + +static int +worker_multi_port_fn(void *arg) +{ + struct test_core_param *param = arg; + struct rte_event ev; + uint16_t valid_event; + uint8_t port = param->port; + rte_atomic32_t *total_events = param->total_events; + int ret; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + ret = validate_event(&ev); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event"); + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } + return 0; +} + +static inline int +wait_workers_to_join(int lcore, const rte_atomic32_t *count) +{ + uint64_t cycles, print_cycles; + RTE_SET_USED(count); + + print_cycles = cycles = rte_get_timer_cycles(); + while (rte_eal_get_lcore_state(lcore) != FINISHED) { + uint64_t new_cycles = rte_get_timer_cycles(); + + if (new_cycles - print_cycles > rte_get_timer_hz()) { + ssovf_log_dbg("\r%s: events %d", __func__, + rte_atomic32_read(count)); + print_cycles = new_cycles; + } + if (new_cycles - cycles > rte_get_timer_hz() * 10) { + ssovf_log_dbg( + "%s: No schedules for seconds, deadlock (%d)", + __func__, + rte_atomic32_read(count)); + rte_event_dev_dump(evdev, stdout); + cycles = new_cycles; + return -1; + } + } + rte_eal_mp_wait_lcore(); + return 0; +} + + +static inline int +launch_workers_and_wait(int (*master_worker)(void *), + int (*slave_workers)(void *), uint32_t total_events, + uint8_t nb_workers, uint8_t sched_type) +{ + uint8_t port = 0; + int w_lcore; + int ret; + struct test_core_param *param; + rte_atomic32_t atomic_total_events; + uint64_t dequeue_tmo_ticks; + + if (!nb_workers) + return 0; + + rte_atomic32_set(&atomic_total_events, total_events); + seqn_list_init(); + + param = malloc(sizeof(struct test_core_param) * nb_workers); + if (!param) + return -1; + + ret = rte_event_dequeue_timeout_ticks(evdev, + rte_rand() % 10000000/* 10ms */, &dequeue_tmo_ticks); + if (ret) { + free(param); + return -1; + } + + param[0].total_events = &atomic_total_events; + param[0].sched_type = sched_type; + param[0].port = 0; + param[0].dequeue_tmo_ticks = dequeue_tmo_ticks; + rte_smp_wmb(); + + w_lcore = rte_get_next_lcore( + /* start core */ -1, + /* skip master */ 1, + /* wrap */ 0); + rte_eal_remote_launch(master_worker, ¶m[0], w_lcore); + + for (port = 1; port < nb_workers; port++) { + param[port].total_events = &atomic_total_events; + param[port].sched_type = sched_type; + param[port].port = port; + param[port].dequeue_tmo_ticks = dequeue_tmo_ticks; + rte_smp_wmb(); + w_lcore = rte_get_next_lcore(w_lcore, 1, 0); + rte_eal_remote_launch(slave_workers, ¶m[port], w_lcore); + } + + ret = wait_workers_to_join(w_lcore, &atomic_total_events); + free(param); + return ret; +} + +/* + * Generate a prescribed number of events and spread them across available + * queues. Dequeue the events through multiple ports and verify the enqueued + * event attributes + */ +static int +test_multi_queue_enq_multi_port_deq(void) +{ + const unsigned int total_events = MAX_EVENTS; + uint32_t nr_ports; + int ret; + + ret = generate_random_events(total_events); + if (ret) + return -1; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &nr_ports), "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (!nr_ports) { + ssovf_log_dbg("%s: Not enough ports=%d or workers=%d", __func__, + nr_ports, rte_lcore_count() - 1); + return 0; + } + + return launch_workers_and_wait(worker_multi_port_fn, + worker_multi_port_fn, total_events, + nr_ports, 0xff /* invalid */); +} + +static +void flush(uint8_t dev_id, struct rte_event event, void *arg) +{ + unsigned int *count = arg; + + RTE_SET_USED(dev_id); + if (event.event_type == RTE_EVENT_TYPE_CPU) + *count = *count + 1; + +} + +static int +test_dev_stop_flush(void) +{ + unsigned int total_events = MAX_EVENTS, count = 0; + int ret; + + ret = generate_random_events(total_events); + if (ret) + return -1; + + ret = rte_event_dev_stop_flush_callback_register(evdev, flush, &count); + if (ret) + return -2; + rte_event_dev_stop(evdev); + ret = rte_event_dev_stop_flush_callback_register(evdev, NULL, NULL); + if (ret) + return -3; + RTE_TEST_ASSERT_EQUAL(total_events, count, + "count mismatch total_events=%d count=%d", + total_events, count); + return 0; +} + +static int +validate_queue_to_port_single_link(uint32_t index, uint8_t port, + struct rte_event *ev) +{ + RTE_SET_USED(index); + RTE_TEST_ASSERT_EQUAL(port, ev->queue_id, + "queue mismatch enq=%d deq =%d", + port, ev->queue_id); + return 0; +} + +/* + * Link queue x to port x and check correctness of link by checking + * queue_id == x on dequeue on the specific port x + */ +static int +test_queue_to_port_single_link(void) +{ + int i, nr_links, ret; + + uint32_t port_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &port_count), "Port count get failed"); + + /* Unlink all connections that created in eventdev_setup */ + for (i = 0; i < (int)port_count; i++) { + ret = rte_event_port_unlink(evdev, i, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, + "Failed to unlink all queues port=%d", i); + } + + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + + nr_links = RTE_MIN(port_count, queue_count); + const unsigned int total_events = MAX_EVENTS / nr_links; + + /* Link queue x to port x and inject events to queue x through port x */ + for (i = 0; i < nr_links; i++) { + uint8_t queue = (uint8_t)i; + + ret = rte_event_port_link(evdev, i, &queue, NULL, 1); + RTE_TEST_ASSERT(ret == 1, "Failed to link queue to port %d", i); + + ret = inject_events( + 0x100 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + queue /* queue */, + i /* port */, + total_events /* events */); + if (ret) + return -1; + } + + /* Verify the events generated from correct queue */ + for (i = 0; i < nr_links; i++) { + ret = consume_events(i /* port */, total_events, + validate_queue_to_port_single_link); + if (ret) + return -1; + } + + return 0; +} + +static int +validate_queue_to_port_multi_link(uint32_t index, uint8_t port, + struct rte_event *ev) +{ + RTE_SET_USED(index); + RTE_TEST_ASSERT_EQUAL(port, (ev->queue_id & 0x1), + "queue mismatch enq=%d deq =%d", + port, ev->queue_id); + return 0; +} + +/* + * Link all even number of queues to port 0 and all odd number of queues to + * port 1 and verify the link connection on dequeue + */ +static int +test_queue_to_port_multi_link(void) +{ + int ret, port0_events = 0, port1_events = 0; + uint8_t queue, port; + uint32_t nr_queues = 0; + uint32_t nr_ports = 0; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &nr_queues), "Queue count get failed"); + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &nr_queues), "Queue count get failed"); + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &nr_ports), "Port count get failed"); + + if (nr_ports < 2) { + ssovf_log_dbg("%s: Not enough ports to test ports=%d", + __func__, nr_ports); + return 0; + } + + /* Unlink all connections that created in eventdev_setup */ + for (port = 0; port < nr_ports; port++) { + ret = rte_event_port_unlink(evdev, port, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, "Failed to unlink all queues port=%d", + port); + } + + const unsigned int total_events = MAX_EVENTS / nr_queues; + + /* Link all even number of queues to port0 and odd numbers to port 1*/ + for (queue = 0; queue < nr_queues; queue++) { + port = queue & 0x1; + ret = rte_event_port_link(evdev, port, &queue, NULL, 1); + RTE_TEST_ASSERT(ret == 1, "Failed to link queue=%d to port=%d", + queue, port); + + ret = inject_events( + 0x100 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + queue /* queue */, + port /* port */, + total_events /* events */); + if (ret) + return -1; + + if (port == 0) + port0_events += total_events; + else + port1_events += total_events; + } + + ret = consume_events(0 /* port */, port0_events, + validate_queue_to_port_multi_link); + if (ret) + return -1; + ret = consume_events(1 /* port */, port1_events, + validate_queue_to_port_multi_link); + if (ret) + return -1; + + return 0; +} + +static int +worker_flow_based_pipeline(void *arg) +{ + struct test_core_param *param = arg; + struct rte_event ev; + uint16_t valid_event; + uint8_t port = param->port; + uint8_t new_sched_type = param->sched_type; + rte_atomic32_t *total_events = param->total_events; + uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, + dequeue_tmo_ticks); + if (!valid_event) + continue; + + /* Events from stage 0 */ + if (ev.sub_event_type == 0) { + /* Move to atomic flow to maintain the ordering */ + ev.flow_id = 0x2; + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.sub_event_type = 1; /* stage 1 */ + ev.sched_type = new_sched_type; + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } else if (ev.sub_event_type == 1) { /* Events from stage 1*/ + if (seqn_list_update(ev.mbuf->seqn) == 0) { + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + ssovf_log_dbg("Failed to update seqn_list"); + return -1; + } + } else { + ssovf_log_dbg("Invalid ev.sub_event_type = %d", + ev.sub_event_type); + return -1; + } + } + return 0; +} + +static int +test_multiport_flow_sched_type_test(uint8_t in_sched_type, + uint8_t out_sched_type) +{ + const unsigned int total_events = MAX_EVENTS; + uint32_t nr_ports; + int ret; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &nr_ports), "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (!nr_ports) { + ssovf_log_dbg("%s: Not enough ports=%d or workers=%d", __func__, + nr_ports, rte_lcore_count() - 1); + return 0; + } + + /* Injects events with m->seqn=0 to total_events */ + ret = inject_events( + 0x1 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type (stage 0) */, + in_sched_type, + 0 /* queue */, + 0 /* port */, + total_events /* events */); + if (ret) + return -1; + + ret = launch_workers_and_wait(worker_flow_based_pipeline, + worker_flow_based_pipeline, + total_events, nr_ports, out_sched_type); + if (ret) + return -1; + + if (in_sched_type != RTE_SCHED_TYPE_PARALLEL && + out_sched_type == RTE_SCHED_TYPE_ATOMIC) { + /* Check the events order maintained or not */ + return seqn_list_check(total_events); + } + return 0; +} + + +/* Multi port ordered to atomic transaction */ +static int +test_multi_port_flow_ordered_to_atomic(void) +{ + /* Ingress event order test */ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_flow_ordered_to_ordered(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_flow_ordered_to_parallel(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +test_multi_port_flow_atomic_to_atomic(void) +{ + /* Ingress event order test */ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_flow_atomic_to_ordered(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_flow_atomic_to_parallel(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +test_multi_port_flow_parallel_to_atomic(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_flow_parallel_to_ordered(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_flow_parallel_to_parallel(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +worker_group_based_pipeline(void *arg) +{ + struct test_core_param *param = arg; + struct rte_event ev; + uint16_t valid_event; + uint8_t port = param->port; + uint8_t new_sched_type = param->sched_type; + rte_atomic32_t *total_events = param->total_events; + uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, + dequeue_tmo_ticks); + if (!valid_event) + continue; + + /* Events from stage 0(group 0) */ + if (ev.queue_id == 0) { + /* Move to atomic flow to maintain the ordering */ + ev.flow_id = 0x2; + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.sched_type = new_sched_type; + ev.queue_id = 1; /* Stage 1*/ + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } else if (ev.queue_id == 1) { /* Events from stage 1(group 1)*/ + if (seqn_list_update(ev.mbuf->seqn) == 0) { + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + ssovf_log_dbg("Failed to update seqn_list"); + return -1; + } + } else { + ssovf_log_dbg("Invalid ev.queue_id = %d", ev.queue_id); + return -1; + } + } + + + return 0; +} + +static int +test_multiport_queue_sched_type_test(uint8_t in_sched_type, + uint8_t out_sched_type) +{ + const unsigned int total_events = MAX_EVENTS; + uint32_t nr_ports; + int ret; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &nr_ports), "Port count get failed"); + + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + if (queue_count < 2 || !nr_ports) { + ssovf_log_dbg("%s: Not enough queues=%d ports=%d or workers=%d", + __func__, queue_count, nr_ports, + rte_lcore_count() - 1); + return 0; + } + + /* Injects events with m->seqn=0 to total_events */ + ret = inject_events( + 0x1 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type (stage 0) */, + in_sched_type, + 0 /* queue */, + 0 /* port */, + total_events /* events */); + if (ret) + return -1; + + ret = launch_workers_and_wait(worker_group_based_pipeline, + worker_group_based_pipeline, + total_events, nr_ports, out_sched_type); + if (ret) + return -1; + + if (in_sched_type != RTE_SCHED_TYPE_PARALLEL && + out_sched_type == RTE_SCHED_TYPE_ATOMIC) { + /* Check the events order maintained or not */ + return seqn_list_check(total_events); + } + return 0; +} + +static int +test_multi_port_queue_ordered_to_atomic(void) +{ + /* Ingress event order test */ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_queue_ordered_to_ordered(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_queue_ordered_to_parallel(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +test_multi_port_queue_atomic_to_atomic(void) +{ + /* Ingress event order test */ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_queue_atomic_to_ordered(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_queue_atomic_to_parallel(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +test_multi_port_queue_parallel_to_atomic(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_queue_parallel_to_ordered(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_queue_parallel_to_parallel(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +worker_flow_based_pipeline_max_stages_rand_sched_type(void *arg) +{ + struct test_core_param *param = arg; + struct rte_event ev; + uint16_t valid_event; + uint8_t port = param->port; + rte_atomic32_t *total_events = param->total_events; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + if (ev.sub_event_type == 255) { /* last stage */ + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.sub_event_type++; + ev.sched_type = + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1); + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + } + return 0; +} + +static int +launch_multi_port_max_stages_random_sched_type(int (*fn)(void *)) +{ + uint32_t nr_ports; + int ret; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &nr_ports), "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (!nr_ports) { + ssovf_log_dbg("%s: Not enough ports=%d or workers=%d", __func__, + nr_ports, rte_lcore_count() - 1); + return 0; + } + + /* Injects events with m->seqn=0 to total_events */ + ret = inject_events( + 0x1 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type (stage 0) */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1) /* sched_type */, + 0 /* queue */, + 0 /* port */, + MAX_EVENTS /* events */); + if (ret) + return -1; + + return launch_workers_and_wait(fn, fn, MAX_EVENTS, nr_ports, + 0xff /* invalid */); +} + +/* Flow based pipeline with maximum stages with random sched type */ +static int +test_multi_port_flow_max_stages_random_sched_type(void) +{ + return launch_multi_port_max_stages_random_sched_type( + worker_flow_based_pipeline_max_stages_rand_sched_type); +} + +static int +worker_queue_based_pipeline_max_stages_rand_sched_type(void *arg) +{ + struct test_core_param *param = arg; + struct rte_event ev; + uint16_t valid_event; + uint8_t port = param->port; + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + uint8_t nr_queues = queue_count; + rte_atomic32_t *total_events = param->total_events; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + if (ev.queue_id == nr_queues - 1) { /* last stage */ + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.queue_id++; + ev.sched_type = + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1); + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + } + return 0; +} + +/* Queue based pipeline with maximum stages with random sched type */ +static int +test_multi_port_queue_max_stages_random_sched_type(void) +{ + return launch_multi_port_max_stages_random_sched_type( + worker_queue_based_pipeline_max_stages_rand_sched_type); +} + +static int +worker_mixed_pipeline_max_stages_rand_sched_type(void *arg) +{ + struct test_core_param *param = arg; + struct rte_event ev; + uint16_t valid_event; + uint8_t port = param->port; + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, + &queue_count), "Queue count get failed"); + uint8_t nr_queues = queue_count; + rte_atomic32_t *total_events = param->total_events; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + if (ev.queue_id == nr_queues - 1) { /* Last stage */ + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.queue_id++; + ev.sub_event_type = rte_rand() % 256; + ev.sched_type = + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1); + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + } + return 0; +} + +/* Queue and flow based pipeline with maximum stages with random sched type */ +static int +test_multi_port_mixed_max_stages_random_sched_type(void) +{ + return launch_multi_port_max_stages_random_sched_type( + worker_mixed_pipeline_max_stages_rand_sched_type); +} + +static int +worker_ordered_flow_producer(void *arg) +{ + struct test_core_param *param = arg; + uint8_t port = param->port; + struct rte_mbuf *m; + int counter = 0; + + while (counter < NUM_PACKETS) { + m = rte_pktmbuf_alloc(eventdev_test_mempool); + if (m == NULL) + continue; + + m->seqn = counter++; + + struct rte_event ev = {.event = 0, .u64 = 0}; + + ev.flow_id = 0x1; /* Generate a fat flow */ + ev.sub_event_type = 0; + /* Inject the new event */ + ev.op = RTE_EVENT_OP_NEW; + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.sched_type = RTE_SCHED_TYPE_ORDERED; + ev.queue_id = 0; + ev.mbuf = m; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + + return 0; +} + +static inline int +test_producer_consumer_ingress_order_test(int (*fn)(void *)) +{ + uint32_t nr_ports; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, + &nr_ports), "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (rte_lcore_count() < 3 || nr_ports < 2) { + ssovf_log_dbg("### Not enough cores for %s test.", __func__); + return 0; + } + + launch_workers_and_wait(worker_ordered_flow_producer, fn, + NUM_PACKETS, nr_ports, RTE_SCHED_TYPE_ATOMIC); + /* Check the events order maintained or not */ + return seqn_list_check(NUM_PACKETS); +} + +/* Flow based producer consumer ingress order test */ +static int +test_flow_producer_consumer_ingress_order_test(void) +{ + return test_producer_consumer_ingress_order_test( + worker_flow_based_pipeline); +} + +/* Queue based producer consumer ingress order test */ +static int +test_queue_producer_consumer_ingress_order_test(void) +{ + return test_producer_consumer_ingress_order_test( + worker_group_based_pipeline); +} + +static void octeontx_test_run(int (*setup)(void), void (*tdown)(void), + int (*test)(void), const char *name) +{ + if (setup() < 0) { + ssovf_log_selftest("Error setting up test %s", name); + unsupported++; + } else { + if (test() < 0) { + failed++; + ssovf_log_selftest("%s Failed", name); + } else { + passed++; + ssovf_log_selftest("%s Passed", name); + } + } + + total++; + tdown(); +} + +int +test_eventdev_octeontx(void) +{ + testsuite_setup(); + + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_simple_enqdeq_ordered); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_simple_enqdeq_atomic); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_simple_enqdeq_parallel); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_queue_enq_single_port_deq); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_dev_stop_flush); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_queue_enq_multi_port_deq); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_queue_to_port_single_link); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_queue_to_port_multi_link); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_ordered_to_atomic); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_ordered_to_ordered); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_ordered_to_parallel); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_atomic_to_atomic); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_atomic_to_ordered); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_atomic_to_parallel); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_parallel_to_atomic); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_parallel_to_ordered); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_parallel_to_parallel); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_ordered_to_atomic); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_ordered_to_ordered); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_ordered_to_parallel); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_atomic_to_atomic); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_atomic_to_ordered); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_atomic_to_parallel); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_parallel_to_atomic); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_parallel_to_ordered); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_parallel_to_parallel); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_max_stages_random_sched_type); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_max_stages_random_sched_type); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_mixed_max_stages_random_sched_type); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_flow_producer_consumer_ingress_order_test); + OCTEONTX_TEST_RUN(eventdev_setup, eventdev_teardown, + test_queue_producer_consumer_ingress_order_test); + OCTEONTX_TEST_RUN(eventdev_setup_priority, eventdev_teardown, + test_multi_queue_priority); + OCTEONTX_TEST_RUN(eventdev_setup_dequeue_timeout, eventdev_teardown, + test_multi_port_flow_ordered_to_atomic); + OCTEONTX_TEST_RUN(eventdev_setup_dequeue_timeout, eventdev_teardown, + test_multi_port_queue_ordered_to_atomic); + + ssovf_log_selftest("Total tests : %d", total); + ssovf_log_selftest("Passed : %d", passed); + ssovf_log_selftest("Failed : %d", failed); + ssovf_log_selftest("Not supported : %d", unsupported); + + testsuite_teardown(); + + if (failed) + return -1; + + return 0; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx/ssovf_probe.c b/src/spdk/dpdk/drivers/event/octeontx/ssovf_probe.c new file mode 100644 index 000000000..4da7d1ae4 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/ssovf_probe.c @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include <rte_atomic.h> +#include <rte_common.h> +#include <rte_eal.h> +#include <rte_io.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> + +#include "octeontx_mbox.h" +#include "ssovf_evdev.h" + +#define PCI_VENDOR_ID_CAVIUM 0x177D +#define PCI_DEVICE_ID_OCTEONTX_SSOGRP_VF 0xA04B +#define PCI_DEVICE_ID_OCTEONTX_SSOWS_VF 0xA04D + +#define SSO_MAX_VHGRP (64) +#define SSO_MAX_VHWS (32) + +struct ssovf_res { + uint16_t domain; + uint16_t vfid; + void *bar0; + void *bar2; +}; + +struct ssowvf_res { + uint16_t domain; + uint16_t vfid; + void *bar0; + void *bar2; + void *bar4; +}; + +struct ssowvf_identify { + uint16_t domain; + uint16_t vfid; +}; + +struct ssodev { + uint8_t total_ssovfs; + uint8_t total_ssowvfs; + struct ssovf_res grp[SSO_MAX_VHGRP]; + struct ssowvf_res hws[SSO_MAX_VHWS]; +}; + +static struct ssodev sdev; + +/* Interface functions */ +int +ssovf_info(struct ssovf_info *info) +{ + uint8_t i; + uint16_t domain; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY || info == NULL) + return -EINVAL; + + if (sdev.total_ssovfs == 0 || sdev.total_ssowvfs == 0) + return -ENODEV; + + domain = sdev.grp[0].domain; + for (i = 0; i < sdev.total_ssovfs; i++) { + /* Check vfid's are contiguous and belong to same domain */ + if (sdev.grp[i].vfid != i || + sdev.grp[i].bar0 == NULL || + sdev.grp[i].domain != domain) { + mbox_log_err("GRP error, vfid=%d/%d domain=%d/%d %p", + i, sdev.grp[i].vfid, + domain, sdev.grp[i].domain, + sdev.grp[i].bar0); + return -EINVAL; + } + } + + for (i = 0; i < sdev.total_ssowvfs; i++) { + /* Check vfid's are contiguous and belong to same domain */ + if (sdev.hws[i].vfid != i || + sdev.hws[i].bar0 == NULL || + sdev.hws[i].domain != domain) { + mbox_log_err("HWS error, vfid=%d/%d domain=%d/%d %p", + i, sdev.hws[i].vfid, + domain, sdev.hws[i].domain, + sdev.hws[i].bar0); + return -EINVAL; + } + } + + info->domain = domain; + info->total_ssovfs = sdev.total_ssovfs; + info->total_ssowvfs = sdev.total_ssowvfs; + return 0; +} + +void* +ssovf_bar(enum ssovf_type type, uint8_t id, uint8_t bar) +{ + if (rte_eal_process_type() != RTE_PROC_PRIMARY || + type > OCTEONTX_SSO_HWS) + return NULL; + + if (type == OCTEONTX_SSO_GROUP) { + if (id >= sdev.total_ssovfs) + return NULL; + } else { + if (id >= sdev.total_ssowvfs) + return NULL; + } + + if (type == OCTEONTX_SSO_GROUP) { + switch (bar) { + case 0: + return sdev.grp[id].bar0; + case 2: + return sdev.grp[id].bar2; + default: + return NULL; + } + } else { + switch (bar) { + case 0: + return sdev.hws[id].bar0; + case 2: + return sdev.hws[id].bar2; + case 4: + return sdev.hws[id].bar4; + default: + return NULL; + } + } +} + +/* SSOWVF pcie device aka event port probe */ + +static int +ssowvf_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +{ + uint16_t vfid; + struct ssowvf_res *res; + struct ssowvf_identify *id; + uint8_t *ram_mbox_base; + + RTE_SET_USED(pci_drv); + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + if (pci_dev->mem_resource[0].addr == NULL || + pci_dev->mem_resource[2].addr == NULL || + pci_dev->mem_resource[4].addr == NULL) { + mbox_log_err("Empty bars %p %p %p", + pci_dev->mem_resource[0].addr, + pci_dev->mem_resource[2].addr, + pci_dev->mem_resource[4].addr); + return -ENODEV; + } + + if (pci_dev->mem_resource[4].len != SSOW_BAR4_LEN) { + mbox_log_err("Bar4 len mismatch %d != %d", + SSOW_BAR4_LEN, (int)pci_dev->mem_resource[4].len); + return -EINVAL; + } + + id = pci_dev->mem_resource[4].addr; + vfid = id->vfid; + if (vfid >= SSO_MAX_VHWS) { + mbox_log_err("Invalid vfid(%d/%d)", vfid, SSO_MAX_VHWS); + return -EINVAL; + } + + res = &sdev.hws[vfid]; + res->vfid = vfid; + res->bar0 = pci_dev->mem_resource[0].addr; + res->bar2 = pci_dev->mem_resource[2].addr; + res->bar4 = pci_dev->mem_resource[4].addr; + res->domain = id->domain; + + sdev.total_ssowvfs++; + if (vfid == 0) { + ram_mbox_base = ssovf_bar(OCTEONTX_SSO_HWS, 0, 4); + if (octeontx_mbox_set_ram_mbox_base(ram_mbox_base, + res->domain)) { + mbox_log_err("Invalid Failed to set ram mbox base"); + return -EINVAL; + } + } + + rte_wmb(); + mbox_log_dbg("Domain=%d hws=%d total_ssowvfs=%d", res->domain, + res->vfid, sdev.total_ssowvfs); + return 0; +} + +static const struct rte_pci_id pci_ssowvf_map[] = { + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_OCTEONTX_SSOWS_VF) + }, + { + .vendor_id = 0, + }, +}; + +static struct rte_pci_driver pci_ssowvf = { + .id_table = pci_ssowvf_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = ssowvf_probe, +}; + +RTE_PMD_REGISTER_PCI(octeontx_ssowvf, pci_ssowvf); + +/* SSOVF pcie device aka event queue probe */ + +static int +ssovf_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +{ + uint64_t val; + uint16_t vfid; + uint8_t *idreg; + struct ssovf_res *res; + uint8_t *reg; + + RTE_SET_USED(pci_drv); + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + if (pci_dev->mem_resource[0].addr == NULL || + pci_dev->mem_resource[2].addr == NULL) { + mbox_log_err("Empty bars %p %p", + pci_dev->mem_resource[0].addr, + pci_dev->mem_resource[2].addr); + return -ENODEV; + } + idreg = pci_dev->mem_resource[0].addr; + idreg += SSO_VHGRP_AQ_THR; + val = rte_read64(idreg); + + /* Write back the default value of aq_thr */ + rte_write64((1ULL << 33) - 1, idreg); + vfid = (val >> 16) & 0xffff; + if (vfid >= SSO_MAX_VHGRP) { + mbox_log_err("Invalid vfid (%d/%d)", vfid, SSO_MAX_VHGRP); + return -EINVAL; + } + + res = &sdev.grp[vfid]; + res->vfid = vfid; + res->bar0 = pci_dev->mem_resource[0].addr; + res->bar2 = pci_dev->mem_resource[2].addr; + res->domain = val & 0xffff; + + sdev.total_ssovfs++; + if (vfid == 0) { + reg = ssovf_bar(OCTEONTX_SSO_GROUP, 0, 0); + reg += SSO_VHGRP_PF_MBOX(1); + if (octeontx_mbox_set_reg(reg, res->domain)) { + mbox_log_err("Invalid Failed to set mbox_reg"); + return -EINVAL; + } + } + + rte_wmb(); + mbox_log_dbg("Domain=%d group=%d total_ssovfs=%d", res->domain, + res->vfid, sdev.total_ssovfs); + return 0; +} + +static const struct rte_pci_id pci_ssovf_map[] = { + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_OCTEONTX_SSOGRP_VF) + }, + { + .vendor_id = 0, + }, +}; + +static struct rte_pci_driver pci_ssovf = { + .id_table = pci_ssovf_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = ssovf_probe, +}; + +RTE_PMD_REGISTER_PCI(octeontx_ssovf, pci_ssovf); diff --git a/src/spdk/dpdk/drivers/event/octeontx/ssovf_worker.c b/src/spdk/dpdk/drivers/event/octeontx/ssovf_worker.c new file mode 100644 index 000000000..d2d5eea8f --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/ssovf_worker.c @@ -0,0 +1,494 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include "ssovf_worker.h" + +static __rte_always_inline void +ssows_new_event(struct ssows *ws, const struct rte_event *ev) +{ + const uint64_t event_ptr = ev->u64; + const uint32_t tag = (uint32_t)ev->event; + const uint8_t new_tt = ev->sched_type; + const uint8_t grp = ev->queue_id; + + ssows_add_work(ws, event_ptr, tag, new_tt, grp); +} + +static __rte_always_inline void +ssows_fwd_swtag(struct ssows *ws, const struct rte_event *ev, const uint8_t grp) +{ + const uint8_t cur_tt = ws->cur_tt; + const uint8_t new_tt = ev->sched_type; + const uint32_t tag = (uint32_t)ev->event; + /* + * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED + * + * SSO_SYNC_ORDERED norm norm untag + * SSO_SYNC_ATOMIC norm norm untag + * SSO_SYNC_UNTAGGED full full NOOP + */ + if (unlikely(cur_tt == SSO_SYNC_UNTAGGED)) { + if (new_tt != SSO_SYNC_UNTAGGED) { + ssows_swtag_full(ws, ev->u64, tag, + new_tt, grp); + } + } else { + if (likely(new_tt != SSO_SYNC_UNTAGGED)) + ssows_swtag_norm(ws, tag, new_tt); + else + ssows_swtag_untag(ws); + } + ws->swtag_req = 1; +} + +#define OCT_EVENT_TYPE_GRP_FWD (RTE_EVENT_TYPE_MAX - 1) + +static __rte_always_inline void +ssows_fwd_group(struct ssows *ws, const struct rte_event *ev, const uint8_t grp) +{ + const uint64_t event_ptr = ev->u64; + const uint32_t tag = (uint32_t)ev->event; + const uint8_t cur_tt = ws->cur_tt; + const uint8_t new_tt = ev->sched_type; + + if (cur_tt == SSO_SYNC_ORDERED) { + /* Create unique tag based on custom event type and new grp */ + uint32_t newtag = OCT_EVENT_TYPE_GRP_FWD << 28; + + newtag |= grp << 20; + newtag |= tag; + ssows_swtag_norm(ws, newtag, SSO_SYNC_ATOMIC); + rte_smp_wmb(); + ssows_swtag_wait(ws); + } else { + rte_smp_wmb(); + } + ssows_add_work(ws, event_ptr, tag, new_tt, grp); +} + +static __rte_always_inline void +ssows_forward_event(struct ssows *ws, const struct rte_event *ev) +{ + const uint8_t grp = ev->queue_id; + + /* Group hasn't changed, Use SWTAG to forward the event */ + if (ws->cur_grp == grp) + ssows_fwd_swtag(ws, ev, grp); + else + /* + * Group has been changed for group based work pipelining, + * Use deschedule/add_work operation to transfer the event to + * new group/core + */ + ssows_fwd_group(ws, ev, grp); +} + +static __rte_always_inline void +ssows_release_event(struct ssows *ws) +{ + if (likely(ws->cur_tt != SSO_SYNC_UNTAGGED)) + ssows_swtag_untag(ws); +} + +#define R(name, f2, f1, f0, flags) \ +static uint16_t __rte_noinline __rte_hot \ +ssows_deq_ ##name(void *port, struct rte_event *ev, uint64_t timeout_ticks) \ +{ \ + struct ssows *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + ssows_swtag_wait(ws); \ + return 1; \ + } else { \ + return ssows_get_work(ws, ev, flags); \ + } \ +} \ + \ +static uint16_t __rte_hot \ +ssows_deq_burst_ ##name(void *port, struct rte_event ev[], \ + uint16_t nb_events, uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return ssows_deq_ ##name(port, ev, timeout_ticks); \ +} \ + \ +static uint16_t __rte_hot \ +ssows_deq_timeout_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct ssows *ws = port; \ + uint64_t iter; \ + uint16_t ret = 1; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + ssows_swtag_wait(ws); \ + } else { \ + ret = ssows_get_work(ws, ev, flags); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = ssows_get_work(ws, ev, flags); \ + } \ + return ret; \ +} \ + \ +static uint16_t __rte_hot \ +ssows_deq_timeout_burst_ ##name(void *port, struct rte_event ev[], \ + uint16_t nb_events, uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return ssows_deq_timeout_ ##name(port, ev, timeout_ticks); \ +} + +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + +__rte_always_inline uint16_t __rte_hot +ssows_enq(void *port, const struct rte_event *ev) +{ + struct ssows *ws = port; + uint16_t ret = 1; + + switch (ev->op) { + case RTE_EVENT_OP_NEW: + rte_smp_wmb(); + ssows_new_event(ws, ev); + break; + case RTE_EVENT_OP_FORWARD: + ssows_forward_event(ws, ev); + break; + case RTE_EVENT_OP_RELEASE: + ssows_release_event(ws); + break; + default: + ret = 0; + } + return ret; +} + +uint16_t __rte_hot +ssows_enq_burst(void *port, const struct rte_event ev[], uint16_t nb_events) +{ + RTE_SET_USED(nb_events); + return ssows_enq(port, ev); +} + +uint16_t __rte_hot +ssows_enq_new_burst(void *port, const struct rte_event ev[], uint16_t nb_events) +{ + uint16_t i; + struct ssows *ws = port; + + rte_smp_wmb(); + for (i = 0; i < nb_events; i++) + ssows_new_event(ws, &ev[i]); + + return nb_events; +} + +uint16_t __rte_hot +ssows_enq_fwd_burst(void *port, const struct rte_event ev[], uint16_t nb_events) +{ + struct ssows *ws = port; + RTE_SET_USED(nb_events); + + ssows_forward_event(ws, ev); + + return 1; +} + +void +ssows_flush_events(struct ssows *ws, uint8_t queue_id, + ssows_handle_event_t fn, void *arg) +{ + uint32_t reg_off; + struct rte_event ev; + uint64_t enable, aq_cnt = 1, cq_ds_cnt = 1; + uint64_t get_work0, get_work1; + uint64_t sched_type_queue; + uint8_t *base = ssovf_bar(OCTEONTX_SSO_GROUP, queue_id, 0); + + enable = ssovf_read64(base + SSO_VHGRP_QCTL); + if (!enable) + return; + + reg_off = SSOW_VHWS_OP_GET_WORK0; + reg_off |= 1 << 17; /* Grouped */ + reg_off |= 1 << 16; /* WAIT */ + reg_off |= queue_id << 4; /* INDEX_GGRP_MASK(group number) */ + while (aq_cnt || cq_ds_cnt) { + aq_cnt = ssovf_read64(base + SSO_VHGRP_AQ_CNT); + cq_ds_cnt = ssovf_read64(base + SSO_VHGRP_INT_CNT); + /* Extract cq and ds count */ + cq_ds_cnt &= 0x1FFF1FFF0000; + + ssovf_load_pair(get_work0, get_work1, ws->base + reg_off); + + sched_type_queue = (get_work0 >> 32) & 0xfff; + ws->cur_tt = sched_type_queue & 0x3; + ws->cur_grp = sched_type_queue >> 2; + sched_type_queue = sched_type_queue << 38; + ev.event = sched_type_queue | (get_work0 & 0xffffffff); + if (get_work1 && ev.event_type == RTE_EVENT_TYPE_ETHDEV) + ev.mbuf = ssovf_octeontx_wqe_to_pkt(get_work1, + (ev.event >> 20) & 0x7F, + OCCTX_RX_OFFLOAD_NONE | + OCCTX_RX_MULTI_SEG_F, + ws->lookup_mem); + else + ev.u64 = get_work1; + + if (fn != NULL && ev.u64 != 0) + fn(arg, ev); + } +} + +void +ssows_reset(struct ssows *ws) +{ + uint64_t tag; + uint64_t pend_tag; + uint8_t pend_tt; + uint8_t tt; + + tag = ssovf_read64(ws->base + SSOW_VHWS_TAG); + pend_tag = ssovf_read64(ws->base + SSOW_VHWS_PENDTAG); + + if (pend_tag & (1ULL << 63)) { /* Tagswitch pending */ + pend_tt = (pend_tag >> 32) & 0x3; + if (pend_tt == SSO_SYNC_ORDERED || pend_tt == SSO_SYNC_ATOMIC) + ssows_desched(ws); + } else { + tt = (tag >> 32) & 0x3; + if (tt == SSO_SYNC_ORDERED || tt == SSO_SYNC_ATOMIC) + ssows_swtag_untag(ws); + } +} + +static __rte_always_inline uint16_t +__sso_event_tx_adapter_enqueue(void *port, struct rte_event ev[], + uint16_t nb_events, uint64_t *cmd, + const uint16_t flag) +{ + uint16_t port_id; + uint16_t queue_id; + struct rte_mbuf *m; + struct rte_eth_dev *ethdev; + struct ssows *ws = port; + struct octeontx_txq *txq; + + switch (ev->sched_type) { + case SSO_SYNC_ORDERED: + ssows_swtag_norm(ws, ev->event, SSO_SYNC_ATOMIC); + rte_cio_wmb(); + ssows_swtag_wait(ws); + break; + case SSO_SYNC_UNTAGGED: + ssows_swtag_full(ws, ev->u64, ev->event, SSO_SYNC_ATOMIC, + ev->queue_id); + rte_cio_wmb(); + ssows_swtag_wait(ws); + break; + case SSO_SYNC_ATOMIC: + rte_cio_wmb(); + break; + } + + m = ev[0].mbuf; + port_id = m->port; + queue_id = rte_event_eth_tx_adapter_txq_get(m); + ethdev = &rte_eth_devices[port_id]; + txq = ethdev->data->tx_queues[queue_id]; + + return __octeontx_xmit_pkts(txq, &m, nb_events, cmd, flag); +} + +#define T(name, f3, f2, f1, f0, sz, flags) \ +static uint16_t __rte_noinline __rte_hot \ +sso_event_tx_adapter_enqueue_ ## name(void *port, struct rte_event ev[], \ + uint16_t nb_events) \ +{ \ + uint64_t cmd[sz]; \ + return __sso_event_tx_adapter_enqueue(port, ev, nb_events, cmd, \ + flags); \ +} + +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + +void +ssovf_fastpath_fns_set(struct rte_eventdev *dev) +{ + struct ssovf_evdev *edev = ssovf_pmd_priv(dev); + + dev->enqueue = ssows_enq; + dev->enqueue_burst = ssows_enq_burst; + dev->enqueue_new_burst = ssows_enq_new_burst; + dev->enqueue_forward_burst = ssows_enq_fwd_burst; + + const event_tx_adapter_enqueue ssow_txa_enqueue[2][2][2][2] = { +#define T(name, f3, f2, f1, f0, sz, flags) \ + [f3][f2][f1][f0] = sso_event_tx_adapter_enqueue_ ##name, + +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + }; + + dev->txa_enqueue = ssow_txa_enqueue + [!!(edev->tx_offload_flags & OCCTX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(edev->tx_offload_flags & OCCTX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(edev->tx_offload_flags & OCCTX_TX_OFFLOAD_L3_L4_CSUM_F)] + [!!(edev->tx_offload_flags & OCCTX_TX_MULTI_SEG_F)]; + + dev->txa_enqueue_same_dest = dev->txa_enqueue; + + /* Assigning dequeue func pointers */ + const event_dequeue_t ssow_deq[2][2][2] = { +#define R(name, f2, f1, f0, flags) \ + [f2][f1][f0] = ssows_deq_ ##name, + +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + dev->dequeue = ssow_deq + [!!(edev->rx_offload_flags & OCCTX_RX_VLAN_FLTR_F)] + [!!(edev->rx_offload_flags & OCCTX_RX_OFFLOAD_CSUM_F)] + [!!(edev->rx_offload_flags & OCCTX_RX_MULTI_SEG_F)]; + + const event_dequeue_burst_t ssow_deq_burst[2][2][2] = { +#define R(name, f2, f1, f0, flags) \ + [f2][f1][f0] = ssows_deq_burst_ ##name, + +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + dev->dequeue_burst = ssow_deq_burst + [!!(edev->rx_offload_flags & OCCTX_RX_VLAN_FLTR_F)] + [!!(edev->rx_offload_flags & OCCTX_RX_OFFLOAD_CSUM_F)] + [!!(edev->rx_offload_flags & OCCTX_RX_MULTI_SEG_F)]; + + if (edev->is_timeout_deq) { + const event_dequeue_t ssow_deq_timeout[2][2][2] = { +#define R(name, f2, f1, f0, flags) \ + [f2][f1][f0] = ssows_deq_timeout_ ##name, + +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + dev->dequeue = ssow_deq_timeout + [!!(edev->rx_offload_flags & OCCTX_RX_VLAN_FLTR_F)] + [!!(edev->rx_offload_flags & OCCTX_RX_OFFLOAD_CSUM_F)] + [!!(edev->rx_offload_flags & OCCTX_RX_MULTI_SEG_F)]; + + const event_dequeue_burst_t ssow_deq_timeout_burst[2][2][2] = { +#define R(name, f2, f1, f0, flags) \ + [f2][f1][f0] = ssows_deq_timeout_burst_ ##name, + +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + dev->dequeue_burst = ssow_deq_timeout_burst + [!!(edev->rx_offload_flags & OCCTX_RX_VLAN_FLTR_F)] + [!!(edev->rx_offload_flags & OCCTX_RX_OFFLOAD_CSUM_F)] + [!!(edev->rx_offload_flags & OCCTX_RX_MULTI_SEG_F)]; + } +} + +static void +octeontx_create_rx_ol_flags_array(void *mem) +{ + uint16_t idx, errcode, errlev; + uint32_t val, *ol_flags; + + /* Skip ptype array memory */ + ol_flags = (uint32_t *)mem; + + for (idx = 0; idx < BIT(ERRCODE_ERRLEN_WIDTH); idx++) { + errcode = idx & 0xff; + errlev = (idx & 0x700) >> 8; + + val = PKT_RX_IP_CKSUM_UNKNOWN; + val |= PKT_RX_L4_CKSUM_UNKNOWN; + val |= PKT_RX_OUTER_L4_CKSUM_UNKNOWN; + + switch (errlev) { + case OCCTX_ERRLEV_RE: + if (errcode) { + val |= PKT_RX_IP_CKSUM_BAD; + val |= PKT_RX_L4_CKSUM_BAD; + } else { + val |= PKT_RX_IP_CKSUM_GOOD; + val |= PKT_RX_L4_CKSUM_GOOD; + } + break; + case OCCTX_ERRLEV_LC: + if (errcode == OCCTX_EC_IP4_CSUM) { + val |= PKT_RX_IP_CKSUM_BAD; + val |= PKT_RX_EIP_CKSUM_BAD; + } else { + val |= PKT_RX_IP_CKSUM_GOOD; + } + break; + case OCCTX_ERRLEV_LD: + /* Check if parsed packet is neither IPv4 or IPV6 */ + if (errcode == OCCTX_EC_IP4_NOT) + break; + val |= PKT_RX_IP_CKSUM_GOOD; + if (errcode == OCCTX_EC_L4_CSUM) + val |= PKT_RX_OUTER_L4_CKSUM_BAD; + else + val |= PKT_RX_L4_CKSUM_GOOD; + break; + case OCCTX_ERRLEV_LE: + if (errcode == OCCTX_EC_IP4_CSUM) + val |= PKT_RX_IP_CKSUM_BAD; + else + val |= PKT_RX_IP_CKSUM_GOOD; + break; + case OCCTX_ERRLEV_LF: + /* Check if parsed packet is neither IPv4 or IPV6 */ + if (errcode == OCCTX_EC_IP4_NOT) + break; + val |= PKT_RX_IP_CKSUM_GOOD; + if (errcode == OCCTX_EC_L4_CSUM) + val |= PKT_RX_L4_CKSUM_BAD; + else + val |= PKT_RX_L4_CKSUM_GOOD; + break; + } + + ol_flags[idx] = val; + } +} + +void * +octeontx_fastpath_lookup_mem_get(void) +{ + const char name[] = OCCTX_FASTPATH_LOOKUP_MEM; + const struct rte_memzone *mz; + void *mem; + + mz = rte_memzone_lookup(name); + if (mz != NULL) + return mz->addr; + + /* Request for the first time */ + mz = rte_memzone_reserve_aligned(name, LOOKUP_ARRAY_SZ, + SOCKET_ID_ANY, 0, OCCTX_ALIGN); + if (mz != NULL) { + mem = mz->addr; + /* Form the rx ol_flags based on errcode */ + octeontx_create_rx_ol_flags_array(mem); + return mem; + } + return NULL; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx/ssovf_worker.h b/src/spdk/dpdk/drivers/event/octeontx/ssovf_worker.h new file mode 100644 index 000000000..6b2fb9b81 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/ssovf_worker.h @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include <rte_common.h> +#include <rte_branch_prediction.h> + +#include <octeontx_mbox.h> + +#include "ssovf_evdev.h" +#include "octeontx_rxtx.h" + +/* Alignment */ +#define OCCTX_ALIGN 128 + +/* Fastpath lookup */ +#define OCCTX_FASTPATH_LOOKUP_MEM "octeontx_fastpath_lookup_mem" + +/* WQE's ERRCODE + ERRLEV (11 bits) */ +#define ERRCODE_ERRLEN_WIDTH 11 +#define ERR_ARRAY_SZ ((BIT(ERRCODE_ERRLEN_WIDTH)) *\ + sizeof(uint32_t)) + +#define LOOKUP_ARRAY_SZ (ERR_ARRAY_SZ) + +#define OCCTX_EC_IP4_NOT 0x41 +#define OCCTX_EC_IP4_CSUM 0x42 +#define OCCTX_EC_L4_CSUM 0x62 + +enum OCCTX_ERRLEV_E { + OCCTX_ERRLEV_RE = 0, + OCCTX_ERRLEV_LA = 1, + OCCTX_ERRLEV_LB = 2, + OCCTX_ERRLEV_LC = 3, + OCCTX_ERRLEV_LD = 4, + OCCTX_ERRLEV_LE = 5, + OCCTX_ERRLEV_LF = 6, + OCCTX_ERRLEV_LG = 7, +}; + +enum { + SSO_SYNC_ORDERED, + SSO_SYNC_ATOMIC, + SSO_SYNC_UNTAGGED, + SSO_SYNC_EMPTY +}; + +/* SSO Operations */ + +static __rte_always_inline uint32_t +ssovf_octeontx_rx_olflags_get(const void * const lookup_mem, const uint64_t in) +{ + const uint32_t * const ol_flags = (const uint32_t *)lookup_mem; + + return ol_flags[(in & 0x7ff)]; +} + +static __rte_always_inline void +ssovf_octeontx_wqe_xtract_mseg(octtx_wqe_t *wqe, + struct rte_mbuf *mbuf) +{ + octtx_pki_buflink_t *buflink; + rte_iova_t *iova_list; + uint8_t nb_segs; + uint64_t bytes_left = wqe->s.w1.len - wqe->s.w5.size; + + nb_segs = wqe->s.w0.bufs; + + buflink = (octtx_pki_buflink_t *)((uintptr_t)wqe->s.w3.addr - + sizeof(octtx_pki_buflink_t)); + + while (--nb_segs) { + iova_list = (rte_iova_t *)(uintptr_t)(buflink->w1.s.addr); + mbuf->next = (struct rte_mbuf *)(rte_iova_t *)(iova_list - 2) + - (OCTTX_PACKET_LATER_SKIP / 128); + mbuf = mbuf->next; + + mbuf->data_off = sizeof(octtx_pki_buflink_t); + + __mempool_check_cookies(mbuf->pool, (void **)&mbuf, 1, 1); + if (nb_segs == 1) + mbuf->data_len = bytes_left; + else + mbuf->data_len = buflink->w0.s.size; + + bytes_left = bytes_left - buflink->w0.s.size; + buflink = (octtx_pki_buflink_t *)(rte_iova_t *)(iova_list - 2); + + } +} + +static __rte_always_inline struct rte_mbuf * +ssovf_octeontx_wqe_to_pkt(uint64_t work, uint16_t port_info, + const uint16_t flag, const void *lookup_mem) +{ + struct rte_mbuf *mbuf; + octtx_wqe_t *wqe = (octtx_wqe_t *)(uintptr_t)work; + + /* Get mbuf from wqe */ + mbuf = (struct rte_mbuf *)((uintptr_t)wqe - OCTTX_PACKET_WQE_SKIP); + rte_prefetch_non_temporal(mbuf); + mbuf->packet_type = + ptype_table[wqe->s.w2.lcty][wqe->s.w2.lety][wqe->s.w2.lfty]; + mbuf->data_off = RTE_PTR_DIFF(wqe->s.w3.addr, mbuf->buf_addr); + mbuf->ol_flags = 0; + mbuf->pkt_len = wqe->s.w1.len; + + if (!!(flag & OCCTX_RX_OFFLOAD_CSUM_F)) + mbuf->ol_flags = ssovf_octeontx_rx_olflags_get(lookup_mem, + wqe->w[2]); + + if (!!(flag & OCCTX_RX_MULTI_SEG_F)) { + mbuf->nb_segs = wqe->s.w0.bufs; + mbuf->data_len = wqe->s.w5.size; + ssovf_octeontx_wqe_xtract_mseg(wqe, mbuf); + } else { + mbuf->nb_segs = 1; + mbuf->data_len = mbuf->pkt_len; + } + + if (!!(flag & OCCTX_RX_VLAN_FLTR_F)) { + if (likely(wqe->s.w2.vv)) { + mbuf->ol_flags |= PKT_RX_VLAN; + mbuf->vlan_tci = + ntohs(*((uint16_t *)((char *)mbuf->buf_addr + + mbuf->data_off + wqe->s.w4.vlptr + 2))); + } + } + + mbuf->port = rte_octeontx_pchan_map[port_info >> 4][port_info & 0xF]; + rte_mbuf_refcnt_set(mbuf, 1); + + return mbuf; +} + +static __rte_always_inline void +ssovf_octeontx_wqe_free(uint64_t work) +{ + octtx_wqe_t *wqe = (octtx_wqe_t *)(uintptr_t)work; + uint8_t nb_segs = wqe->s.w0.bufs; + octtx_pki_buflink_t *buflink; + struct rte_mbuf *mbuf, *head; + rte_iova_t *iova_list; + + mbuf = (struct rte_mbuf *)((uintptr_t)wqe - OCTTX_PACKET_WQE_SKIP); + buflink = (octtx_pki_buflink_t *)((uintptr_t)wqe->s.w3.addr - + sizeof(octtx_pki_buflink_t)); + head = mbuf; + while (--nb_segs) { + iova_list = (rte_iova_t *)(uintptr_t)(buflink->w1.s.addr); + mbuf = (struct rte_mbuf *)(rte_iova_t *)(iova_list - 2) + - (OCTTX_PACKET_LATER_SKIP / 128); + + mbuf->next = NULL; + rte_pktmbuf_free(mbuf); + buflink = (octtx_pki_buflink_t *)(rte_iova_t *)(iova_list - 2); + } + rte_pktmbuf_free(head); +} + +static __rte_always_inline uint16_t +ssows_get_work(struct ssows *ws, struct rte_event *ev, const uint16_t flag) +{ + uint64_t get_work0, get_work1; + uint64_t sched_type_queue; + + ssovf_load_pair(get_work0, get_work1, ws->getwork); + + sched_type_queue = (get_work0 >> 32) & 0xfff; + ws->cur_tt = sched_type_queue & 0x3; + ws->cur_grp = sched_type_queue >> 2; + sched_type_queue = sched_type_queue << 38; + ev->event = sched_type_queue | (get_work0 & 0xffffffff); + + if (get_work1 && ev->event_type == RTE_EVENT_TYPE_ETHDEV) { + ev->mbuf = ssovf_octeontx_wqe_to_pkt(get_work1, + (ev->event >> 20) & 0x7F, flag, ws->lookup_mem); + } else if (unlikely((get_work0 & 0xFFFFFFFF) == 0xFFFFFFFF)) { + ssovf_octeontx_wqe_free(get_work1); + return 0; + } else { + ev->u64 = get_work1; + } + + return !!get_work1; +} + +static __rte_always_inline void +ssows_add_work(struct ssows *ws, const uint64_t event_ptr, const uint32_t tag, + const uint8_t new_tt, const uint8_t grp) +{ + uint64_t add_work0; + + add_work0 = tag | ((uint64_t)(new_tt) << 32); + ssovf_store_pair(add_work0, event_ptr, ws->grps[grp]); +} + +static __rte_always_inline void +ssows_swtag_full(struct ssows *ws, const uint64_t event_ptr, const uint32_t tag, + const uint8_t new_tt, const uint8_t grp) +{ + uint64_t swtag_full0; + + swtag_full0 = tag | ((uint64_t)(new_tt & 0x3) << 32) | + ((uint64_t)grp << 34); + ssovf_store_pair(swtag_full0, event_ptr, (ws->base + + SSOW_VHWS_OP_SWTAG_FULL0)); +} + +static __rte_always_inline void +ssows_swtag_desched(struct ssows *ws, uint32_t tag, uint8_t new_tt, uint8_t grp) +{ + uint64_t val; + + val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34); + ssovf_write64(val, ws->base + SSOW_VHWS_OP_SWTAG_DESCHED); +} + +static __rte_always_inline void +ssows_swtag_norm(struct ssows *ws, uint32_t tag, uint8_t new_tt) +{ + uint64_t val; + + val = tag | ((uint64_t)(new_tt & 0x3) << 32); + ssovf_write64(val, ws->base + SSOW_VHWS_OP_SWTAG_NORM); +} + +static __rte_always_inline void +ssows_swtag_untag(struct ssows *ws) +{ + ssovf_write64(0, ws->base + SSOW_VHWS_OP_SWTAG_UNTAG); + ws->cur_tt = SSO_SYNC_UNTAGGED; +} + +static __rte_always_inline void +ssows_upd_wqp(struct ssows *ws, uint8_t grp, uint64_t event_ptr) +{ + ssovf_store_pair((uint64_t)grp << 34, event_ptr, (ws->base + + SSOW_VHWS_OP_UPD_WQP_GRP0)); +} + +static __rte_always_inline void +ssows_desched(struct ssows *ws) +{ + ssovf_write64(0, ws->base + SSOW_VHWS_OP_DESCHED); +} + +static __rte_always_inline void +ssows_swtag_wait(struct ssows *ws) +{ + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (ssovf_read64(ws->base + SSOW_VHWS_SWTP)) + ; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx/timvf_evdev.c b/src/spdk/dpdk/drivers/event/octeontx/timvf_evdev.c new file mode 100644 index 000000000..caa129087 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/timvf_evdev.c @@ -0,0 +1,405 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include "timvf_evdev.h" + +int otx_logtype_timvf; + +RTE_INIT(otx_timvf_init_log) +{ + otx_logtype_timvf = rte_log_register("pmd.event.octeontx.timer"); + if (otx_logtype_timvf >= 0) + rte_log_set_level(otx_logtype_timvf, RTE_LOG_NOTICE); +} + +struct __rte_packed timvf_mbox_dev_info { + uint64_t ring_active[4]; + uint64_t clk_freq; +}; + +/* Response messages */ +enum { + MBOX_RET_SUCCESS, + MBOX_RET_INVALID, + MBOX_RET_INTERNAL_ERR, +}; + +static int +timvf_mbox_dev_info_get(struct timvf_mbox_dev_info *info) +{ + struct octeontx_mbox_hdr hdr = {0}; + uint16_t len = sizeof(struct timvf_mbox_dev_info); + + hdr.coproc = TIM_COPROC; + hdr.msg = TIM_GET_DEV_INFO; + hdr.vfid = 0; /* TIM DEV is always 0. TIM RING ID changes. */ + + memset(info, 0, len); + return octeontx_mbox_send(&hdr, NULL, 0, info, len); +} + +static void +timvf_ring_info_get(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer_adapter_info *adptr_info) +{ + struct timvf_ring *timr = adptr->data->adapter_priv; + adptr_info->max_tmo_ns = timr->max_tout; + adptr_info->min_resolution_ns = timr->tck_nsec; + rte_memcpy(&adptr_info->conf, &adptr->data->conf, + sizeof(struct rte_event_timer_adapter_conf)); +} + +static int +timvf_ring_conf_set(struct timvf_ctrl_reg *rctl, uint8_t ring_id) +{ + struct octeontx_mbox_hdr hdr = {0}; + uint16_t len = sizeof(struct timvf_ctrl_reg); + int ret; + + hdr.coproc = TIM_COPROC; + hdr.msg = TIM_SET_RING_INFO; + hdr.vfid = ring_id; + + ret = octeontx_mbox_send(&hdr, rctl, len, NULL, 0); + if (ret < 0 || hdr.res_code != MBOX_RET_SUCCESS) + return -EACCES; + return 0; +} + +static int +timvf_get_start_cyc(uint64_t *now, uint8_t ring_id) +{ + struct octeontx_mbox_hdr hdr = {0}; + + hdr.coproc = TIM_COPROC; + hdr.msg = TIM_RING_START_CYC_GET; + hdr.vfid = ring_id; + *now = 0; + return octeontx_mbox_send(&hdr, NULL, 0, now, sizeof(uint64_t)); +} + +static int +optimize_bucket_parameters(struct timvf_ring *timr) +{ + uint32_t hbkts; + uint32_t lbkts; + uint64_t tck_nsec; + + hbkts = rte_align32pow2(timr->nb_bkts); + tck_nsec = RTE_ALIGN_MUL_CEIL(timr->max_tout / (hbkts - 1), 10); + + if ((tck_nsec < 1000 || hbkts > TIM_MAX_BUCKETS)) + hbkts = 0; + + lbkts = rte_align32prevpow2(timr->nb_bkts); + tck_nsec = RTE_ALIGN_MUL_CEIL((timr->max_tout / (lbkts - 1)), 10); + + if ((tck_nsec < 1000 || hbkts > TIM_MAX_BUCKETS)) + lbkts = 0; + + if (!hbkts && !lbkts) + return 0; + + if (!hbkts) { + timr->nb_bkts = lbkts; + goto end; + } else if (!lbkts) { + timr->nb_bkts = hbkts; + goto end; + } + + timr->nb_bkts = (hbkts - timr->nb_bkts) < + (timr->nb_bkts - lbkts) ? hbkts : lbkts; +end: + timr->get_target_bkt = bkt_and; + timr->tck_nsec = RTE_ALIGN_MUL_CEIL((timr->max_tout / + (timr->nb_bkts - 1)), 10); + return 1; +} + +static int +timvf_ring_start(const struct rte_event_timer_adapter *adptr) +{ + int ret; + uint8_t use_fpa = 0; + uint64_t interval; + uintptr_t pool; + struct timvf_ctrl_reg rctrl; + struct timvf_mbox_dev_info dinfo; + struct timvf_ring *timr = adptr->data->adapter_priv; + + ret = timvf_mbox_dev_info_get(&dinfo); + if (ret < 0 || ret != sizeof(struct timvf_mbox_dev_info)) + return -EINVAL; + + /* Calculate the interval cycles according to clock source. */ + switch (timr->clk_src) { + case TIM_CLK_SRC_SCLK: + interval = NSEC2CLK(timr->tck_nsec, dinfo.clk_freq); + break; + case TIM_CLK_SRC_GPIO: + /* GPIO doesn't work on tck_nsec. */ + interval = 0; + break; + case TIM_CLK_SRC_GTI: + interval = NSEC2CLK(timr->tck_nsec, dinfo.clk_freq); + break; + case TIM_CLK_SRC_PTP: + interval = NSEC2CLK(timr->tck_nsec, dinfo.clk_freq); + break; + default: + timvf_log_err("Unsupported clock source configured %d", + timr->clk_src); + return -EINVAL; + } + + if (!strcmp(rte_mbuf_best_mempool_ops(), "octeontx_fpavf")) + use_fpa = 1; + + /*CTRL0 register.*/ + rctrl.rctrl0 = interval; + + /*CTRL1 register.*/ + rctrl.rctrl1 = (uint64_t)(timr->clk_src) << 51 | + 1ull << 48 /* LOCK_EN (Enable hw bucket lock mechanism) */ | + 1ull << 47 /* ENA */ | + 1ull << 44 /* ENA_LDWB */ | + (timr->nb_bkts - 1); + + rctrl.rctrl2 = (uint64_t)(TIM_CHUNK_SIZE / 16) << 40; + + if (use_fpa) { + pool = (uintptr_t)((struct rte_mempool *) + timr->chunk_pool)->pool_id; + ret = octeontx_fpa_bufpool_gaura(pool); + if (ret < 0) { + timvf_log_dbg("Unable to get gaura id"); + ret = -ENOMEM; + goto error; + } + timvf_write64((uint64_t)ret, + (uint8_t *)timr->vbar0 + TIM_VRING_AURA); + } else { + rctrl.rctrl1 |= 1ull << 43 /* ENA_DFB (Enable don't free) */; + } + + timvf_write64((uintptr_t)timr->bkt, + (uint8_t *)timr->vbar0 + TIM_VRING_BASE); + timvf_set_chunk_refill(timr, use_fpa); + if (timvf_ring_conf_set(&rctrl, timr->tim_ring_id)) { + ret = -EACCES; + goto error; + } + + if (timvf_get_start_cyc(&timr->ring_start_cyc, + timr->tim_ring_id) < 0) { + ret = -EACCES; + goto error; + } + timr->tck_int = NSEC2CLK(timr->tck_nsec, rte_get_timer_hz()); + timr->fast_div = rte_reciprocal_value_u64(timr->tck_int); + timvf_log_info("nb_bkts %d min_ns %"PRIu64" min_cyc %"PRIu64"" + " maxtmo %"PRIu64"\n", + timr->nb_bkts, timr->tck_nsec, interval, + timr->max_tout); + + return 0; +error: + rte_free(timr->bkt); + rte_mempool_free(timr->chunk_pool); + return ret; +} + +static int +timvf_ring_stop(const struct rte_event_timer_adapter *adptr) +{ + struct timvf_ring *timr = adptr->data->adapter_priv; + struct timvf_ctrl_reg rctrl = {0}; + rctrl.rctrl0 = timvf_read64((uint8_t *)timr->vbar0 + TIM_VRING_CTL0); + rctrl.rctrl1 = timvf_read64((uint8_t *)timr->vbar0 + TIM_VRING_CTL1); + rctrl.rctrl1 &= ~(1ull << 47); /* Disable */ + rctrl.rctrl2 = timvf_read64((uint8_t *)timr->vbar0 + TIM_VRING_CTL2); + + if (timvf_ring_conf_set(&rctrl, timr->tim_ring_id)) + return -EACCES; + return 0; +} + +static int +timvf_ring_create(struct rte_event_timer_adapter *adptr) +{ + char pool_name[25]; + int ret; + uint8_t tim_ring_id; + uint64_t nb_timers; + struct rte_event_timer_adapter_conf *rcfg = &adptr->data->conf; + struct timvf_ring *timr; + const char *mempool_ops; + unsigned int mp_flags = 0; + + tim_ring_id = timvf_get_ring(); + if (tim_ring_id == UINT8_MAX) + return -ENODEV; + + timr = rte_zmalloc("octeontx_timvf_priv", + sizeof(struct timvf_ring), 0); + if (timr == NULL) + return -ENOMEM; + + adptr->data->adapter_priv = timr; + /* Check config parameters. */ + if ((rcfg->clk_src != RTE_EVENT_TIMER_ADAPTER_CPU_CLK) && + (!rcfg->timer_tick_ns || + rcfg->timer_tick_ns < TIM_MIN_INTERVAL)) { + timvf_log_err("Too low timer ticks"); + goto cfg_err; + } + + timr->clk_src = (int) rcfg->clk_src; + timr->tim_ring_id = tim_ring_id; + timr->tck_nsec = RTE_ALIGN_MUL_CEIL(rcfg->timer_tick_ns, 10); + timr->max_tout = rcfg->max_tmo_ns; + timr->nb_bkts = (timr->max_tout / timr->tck_nsec); + timr->vbar0 = timvf_bar(timr->tim_ring_id, 0); + timr->bkt_pos = (uint8_t *)timr->vbar0 + TIM_VRING_REL; + nb_timers = rcfg->nb_timers; + timr->get_target_bkt = bkt_mod; + + timr->nb_chunks = nb_timers / nb_chunk_slots; + + /* Try to optimize the bucket parameters. */ + if ((rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES) + && !rte_is_power_of_2(timr->nb_bkts)) { + if (optimize_bucket_parameters(timr)) { + timvf_log_info("Optimized configured values"); + timvf_log_dbg("nb_bkts : %"PRIu32"", timr->nb_bkts); + timvf_log_dbg("tck_nsec : %"PRIu64"", timr->tck_nsec); + } else + timvf_log_info("Failed to Optimize configured values"); + } + + if (rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT) { + mp_flags = MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET; + timvf_log_info("Using single producer mode"); + } + + timr->bkt = rte_zmalloc("octeontx_timvf_bucket", + (timr->nb_bkts) * sizeof(struct tim_mem_bucket), + 0); + if (timr->bkt == NULL) + goto mem_err; + + snprintf(pool_name, sizeof(pool_name), "timvf_chunk_pool%d", + timr->tim_ring_id); + timr->chunk_pool = (void *)rte_mempool_create_empty(pool_name, + timr->nb_chunks, TIM_CHUNK_SIZE, 0, 0, rte_socket_id(), + mp_flags); + + if (!timr->chunk_pool) { + rte_free(timr->bkt); + timvf_log_err("Unable to create chunkpool."); + return -ENOMEM; + } + + mempool_ops = rte_mbuf_best_mempool_ops(); + ret = rte_mempool_set_ops_byname(timr->chunk_pool, + mempool_ops, NULL); + + if (ret != 0) { + timvf_log_err("Unable to set chunkpool ops."); + goto mem_err; + } + + ret = rte_mempool_populate_default(timr->chunk_pool); + if (ret < 0) { + timvf_log_err("Unable to set populate chunkpool."); + goto mem_err; + } + timvf_write64(0, (uint8_t *)timr->vbar0 + TIM_VRING_BASE); + timvf_write64(0, (uint8_t *)timr->vbar0 + TIM_VF_NRSPERR_INT); + timvf_write64(0, (uint8_t *)timr->vbar0 + TIM_VF_NRSPERR_INT_W1S); + timvf_write64(0x7, (uint8_t *)timr->vbar0 + TIM_VF_NRSPERR_ENA_W1C); + timvf_write64(0x7, (uint8_t *)timr->vbar0 + TIM_VF_NRSPERR_ENA_W1S); + + return 0; +mem_err: + rte_free(timr); + return -ENOMEM; +cfg_err: + rte_free(timr); + return -EINVAL; +} + +static int +timvf_ring_free(struct rte_event_timer_adapter *adptr) +{ + struct timvf_ring *timr = adptr->data->adapter_priv; + + rte_mempool_free(timr->chunk_pool); + rte_free(timr->bkt); + timvf_release_ring(timr->tim_ring_id); + rte_free(adptr->data->adapter_priv); + return 0; +} + +static int +timvf_stats_get(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_stats *stats) +{ + struct timvf_ring *timr = adapter->data->adapter_priv; + uint64_t bkt_cyc = rte_rdtsc() - timr->ring_start_cyc; + + stats->evtim_exp_count = timr->tim_arm_cnt; + stats->ev_enq_count = timr->tim_arm_cnt; + stats->adapter_tick_count = rte_reciprocal_divide_u64(bkt_cyc, + &timr->fast_div); + return 0; +} + +static int +timvf_stats_reset(const struct rte_event_timer_adapter *adapter) +{ + struct timvf_ring *timr = adapter->data->adapter_priv; + + timr->tim_arm_cnt = 0; + return 0; +} + +static struct rte_event_timer_adapter_ops timvf_ops = { + .init = timvf_ring_create, + .uninit = timvf_ring_free, + .start = timvf_ring_start, + .stop = timvf_ring_stop, + .get_info = timvf_ring_info_get, +}; + +int +timvf_timer_adapter_caps_get(const struct rte_eventdev *dev, uint64_t flags, + uint32_t *caps, const struct rte_event_timer_adapter_ops **ops, + uint8_t enable_stats) +{ + RTE_SET_USED(dev); + + if (enable_stats) { + timvf_ops.stats_get = timvf_stats_get; + timvf_ops.stats_reset = timvf_stats_reset; + } + + if (flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT) + timvf_ops.arm_burst = enable_stats ? + timvf_timer_arm_burst_sp_stats : + timvf_timer_arm_burst_sp; + else + timvf_ops.arm_burst = enable_stats ? + timvf_timer_arm_burst_mp_stats : + timvf_timer_arm_burst_mp; + + timvf_ops.arm_tmo_tick_burst = enable_stats ? + timvf_timer_arm_tmo_brst_stats : + timvf_timer_arm_tmo_brst; + timvf_ops.cancel_burst = timvf_timer_cancel_burst; + *caps = RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT; + *ops = &timvf_ops; + return 0; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx/timvf_evdev.h b/src/spdk/dpdk/drivers/event/octeontx/timvf_evdev.h new file mode 100644 index 000000000..d0e5921db --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/timvf_evdev.h @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#ifndef __TIMVF_EVDEV_H__ +#define __TIMVF_EVDEV_H__ + +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_debug.h> +#include <rte_eal.h> +#include <rte_eventdev.h> +#include <rte_event_timer_adapter.h> +#include <rte_event_timer_adapter_pmd.h> +#include <rte_io.h> +#include <rte_lcore.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_mbuf_pool_ops.h> +#include <rte_mempool.h> +#include <rte_memzone.h> +#include <rte_pci.h> +#include <rte_prefetch.h> +#include <rte_reciprocal.h> + +#include <octeontx_mbox.h> +#include <octeontx_fpavf.h> + +#define timvf_log(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, otx_logtype_timvf, \ + "[%s] %s() " fmt "\n", \ + RTE_STR(event_timer_octeontx), __func__, ## args) + +#define timvf_log_info(fmt, ...) timvf_log(INFO, fmt, ##__VA_ARGS__) +#define timvf_log_dbg(fmt, ...) timvf_log(DEBUG, fmt, ##__VA_ARGS__) +#define timvf_log_err(fmt, ...) timvf_log(ERR, fmt, ##__VA_ARGS__) +#define timvf_func_trace timvf_log_dbg + +#define TIM_COPROC (8) +#define TIM_GET_DEV_INFO (1) +#define TIM_GET_RING_INFO (2) +#define TIM_SET_RING_INFO (3) +#define TIM_RING_START_CYC_GET (4) + +#define TIM_MAX_RINGS (64) +#define TIM_DEV_PER_NODE (1) +#define TIM_VF_PER_DEV (64) +#define TIM_RING_PER_DEV (TIM_VF_PER_DEV) +#define TIM_RING_NODE_SHIFT (6) +#define TIM_RING_MASK ((TIM_RING_PER_DEV) - 1) +#define TIM_RING_INVALID (-1) + +#define TIM_MIN_INTERVAL (1E3) +#define TIM_MAX_INTERVAL ((1ull << 32) - 1) +#define TIM_MAX_BUCKETS (1ull << 20) +#define TIM_CHUNK_SIZE (4096) +#define TIM_MAX_CHUNKS_PER_BUCKET (1ull << 32) + +#define TIMVF_MAX_BURST (8) + +/* TIM VF Control/Status registers (CSRs): */ +/* VF_BAR0: */ +#define TIM_VF_NRSPERR_INT (0x0) +#define TIM_VF_NRSPERR_INT_W1S (0x8) +#define TIM_VF_NRSPERR_ENA_W1C (0x10) +#define TIM_VF_NRSPERR_ENA_W1S (0x18) +#define TIM_VRING_FR_RN_CYCLES (0x20) +#define TIM_VRING_FR_RN_GPIOS (0x28) +#define TIM_VRING_FR_RN_GTI (0x30) +#define TIM_VRING_FR_RN_PTP (0x38) +#define TIM_VRING_CTL0 (0x40) +#define TIM_VRING_CTL1 (0x50) +#define TIM_VRING_CTL2 (0x60) +#define TIM_VRING_BASE (0x100) +#define TIM_VRING_AURA (0x108) +#define TIM_VRING_REL (0x110) + +#define TIM_CTL1_W0_S_BUCKET 20 +#define TIM_CTL1_W0_M_BUCKET ((1ull << (40 - 20)) - 1) + +#define TIM_BUCKET_W1_S_NUM_ENTRIES (0) /*Shift*/ +#define TIM_BUCKET_W1_M_NUM_ENTRIES ((1ull << (32 - 0)) - 1) +#define TIM_BUCKET_W1_S_SBT (32) +#define TIM_BUCKET_W1_M_SBT ((1ull << (33 - 32)) - 1) +#define TIM_BUCKET_W1_S_HBT (33) +#define TIM_BUCKET_W1_M_HBT ((1ull << (34 - 33)) - 1) +#define TIM_BUCKET_W1_S_BSK (34) +#define TIM_BUCKET_W1_M_BSK ((1ull << (35 - 34)) - 1) +#define TIM_BUCKET_W1_S_LOCK (40) +#define TIM_BUCKET_W1_M_LOCK ((1ull << (48 - 40)) - 1) +#define TIM_BUCKET_W1_S_CHUNK_REMAINDER (48) +#define TIM_BUCKET_W1_M_CHUNK_REMAINDER ((1ull << (64 - 48)) - 1) + +#define TIM_BUCKET_SEMA \ + (TIM_BUCKET_CHUNK_REMAIN) + +#define TIM_BUCKET_CHUNK_REMAIN \ + (TIM_BUCKET_W1_M_CHUNK_REMAINDER << TIM_BUCKET_W1_S_CHUNK_REMAINDER) + +#define TIM_BUCKET_LOCK \ + (TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK) + +#define TIM_BUCKET_SEMA_WLOCK \ + (TIM_BUCKET_CHUNK_REMAIN | (1ull << TIM_BUCKET_W1_S_LOCK)) + +#define NSEC_PER_SEC 1E9 +#define NSEC2CLK(__ns, __freq) (((__ns) * (__freq)) / NSEC_PER_SEC) +#define CLK2NSEC(__clk, __freq) (((__clk) * NSEC_PER_SEC) / (__freq)) + +#define timvf_read64 rte_read64_relaxed +#define timvf_write64 rte_write64_relaxed + +#define TIMVF_ENABLE_STATS_ARG ("timvf_stats") + +extern int otx_logtype_timvf; +static const uint16_t nb_chunk_slots = (TIM_CHUNK_SIZE / 16) - 1; + +enum timvf_clk_src { + TIM_CLK_SRC_SCLK = RTE_EVENT_TIMER_ADAPTER_CPU_CLK, + TIM_CLK_SRC_GPIO = RTE_EVENT_TIMER_ADAPTER_EXT_CLK0, + TIM_CLK_SRC_GTI = RTE_EVENT_TIMER_ADAPTER_EXT_CLK1, + TIM_CLK_SRC_PTP = RTE_EVENT_TIMER_ADAPTER_EXT_CLK2, +}; + +/* TIM_MEM_BUCKET */ +struct tim_mem_bucket { + uint64_t first_chunk; + union { + uint64_t w1; + struct { + uint32_t nb_entry; + uint8_t sbt:1; + uint8_t hbt:1; + uint8_t bsk:1; + uint8_t rsvd:5; + uint8_t lock; + int16_t chunk_remainder; + }; + }; + uint64_t current_chunk; + uint64_t pad; +} __rte_packed __rte_aligned(8); + +struct tim_mem_entry { + uint64_t w0; + uint64_t wqe; +} __rte_packed; + +struct timvf_ctrl_reg { + uint64_t rctrl0; + uint64_t rctrl1; + uint64_t rctrl2; + uint8_t use_pmu; +} __rte_packed; + +struct timvf_ring; + +typedef uint32_t (*bkt_id)(const uint32_t bkt_tcks, const uint32_t nb_bkts); +typedef struct tim_mem_entry * (*refill_chunk)( + struct tim_mem_bucket * const bkt, + struct timvf_ring * const timr); + +struct timvf_ring { + bkt_id get_target_bkt; + refill_chunk refill_chunk; + struct rte_reciprocal_u64 fast_div; + uint64_t ring_start_cyc; + uint32_t nb_bkts; + struct tim_mem_bucket *bkt; + void *chunk_pool; + uint64_t tck_int; + volatile uint64_t tim_arm_cnt; + uint64_t tck_nsec; + void *vbar0; + void *bkt_pos; + uint64_t max_tout; + uint64_t nb_chunks; + enum timvf_clk_src clk_src; + uint16_t tim_ring_id; +} __rte_cache_aligned; + +static __rte_always_inline uint32_t +bkt_mod(const uint32_t rel_bkt, const uint32_t nb_bkts) +{ + return rel_bkt % nb_bkts; +} + +static __rte_always_inline uint32_t +bkt_and(uint32_t rel_bkt, uint32_t nb_bkts) +{ + return rel_bkt & (nb_bkts - 1); +} + +uint8_t timvf_get_ring(void); +void timvf_release_ring(uint8_t vfid); +void *timvf_bar(uint8_t id, uint8_t bar); +int timvf_timer_adapter_caps_get(const struct rte_eventdev *dev, uint64_t flags, + uint32_t *caps, const struct rte_event_timer_adapter_ops **ops, + uint8_t enable_stats); +uint16_t timvf_timer_cancel_burst(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers); +uint16_t timvf_timer_arm_burst_sp(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers); +uint16_t timvf_timer_arm_burst_sp_stats( + const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers); +uint16_t timvf_timer_arm_burst_mp(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers); +uint16_t timvf_timer_arm_burst_mp_stats( + const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers); +uint16_t timvf_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint64_t timeout_tick, + const uint16_t nb_timers); +uint16_t timvf_timer_arm_tmo_brst_stats( + const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint64_t timeout_tick, + const uint16_t nb_timers); +void timvf_set_chunk_refill(struct timvf_ring * const timr, uint8_t use_fpa); + +#endif /* __TIMVF_EVDEV_H__ */ diff --git a/src/spdk/dpdk/drivers/event/octeontx/timvf_probe.c b/src/spdk/dpdk/drivers/event/octeontx/timvf_probe.c new file mode 100644 index 000000000..59bba31e8 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/timvf_probe.c @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include <rte_eal.h> +#include <rte_io.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> + +#include <octeontx_mbox.h> + +#include "ssovf_evdev.h" +#include "timvf_evdev.h" + +#ifndef PCI_VENDOR_ID_CAVIUM +#define PCI_VENDOR_ID_CAVIUM (0x177D) +#endif + +#define PCI_DEVICE_ID_OCTEONTX_TIM_VF (0xA051) +#define TIM_MAX_RINGS (64) + +struct timvf_res { + uint8_t in_use; + uint16_t domain; + uint16_t vfid; + void *bar0; + void *bar2; + void *bar4; +}; + +struct timdev { + uint8_t total_timvfs; + struct timvf_res rings[TIM_MAX_RINGS]; +}; + +static struct timdev tdev; + +uint8_t +timvf_get_ring(void) +{ + uint16_t global_domain = octeontx_get_global_domain(); + int i; + + for (i = 0; i < tdev.total_timvfs; i++) { + if (tdev.rings[i].domain != global_domain) + continue; + if (tdev.rings[i].in_use) + continue; + + tdev.rings[i].in_use = true; + return tdev.rings[i].vfid; + } + + return UINT8_MAX; +} + +void +timvf_release_ring(uint8_t tim_ring_id) +{ + uint16_t global_domain = octeontx_get_global_domain(); + int i; + + for (i = 0; i < tdev.total_timvfs; i++) { + if (tdev.rings[i].domain != global_domain) + continue; + if (tdev.rings[i].vfid == tim_ring_id) + tdev.rings[i].in_use = false; + } +} + +void* +timvf_bar(uint8_t vfid, uint8_t bar) +{ + uint16_t global_domain = octeontx_get_global_domain(); + struct timvf_res *res = NULL; + int i; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return NULL; + + for (i = 0; i < tdev.total_timvfs; i++) { + if (tdev.rings[i].domain != global_domain) + continue; + if (tdev.rings[i].vfid == vfid) + res = &tdev.rings[i]; + + } + + if (res == NULL) + return NULL; + + switch (bar) { + case 0: + return res->bar0; + case 4: + return res->bar4; + default: + return NULL; + } +} + +static int +timvf_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +{ + uint64_t val; + uint16_t vfid; + struct timvf_res *res; + + RTE_SET_USED(pci_drv); + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + if (pci_dev->mem_resource[0].addr == NULL || + pci_dev->mem_resource[4].addr == NULL) { + timvf_log_err("Empty bars %p %p", + pci_dev->mem_resource[0].addr, + pci_dev->mem_resource[4].addr); + return -ENODEV; + } + + val = rte_read64((uint8_t *)pci_dev->mem_resource[0].addr + + 0x100 /* TIM_VRINGX_BASE */); + vfid = (val >> 23) & 0xff; + if (vfid >= TIM_MAX_RINGS) { + timvf_log_err("Invalid vfid(%d/%d)", vfid, TIM_MAX_RINGS); + return -EINVAL; + } + + res = &tdev.rings[tdev.total_timvfs]; + res->vfid = vfid; + res->bar0 = pci_dev->mem_resource[0].addr; + res->bar2 = pci_dev->mem_resource[2].addr; + res->bar4 = pci_dev->mem_resource[4].addr; + res->domain = (val >> 7) & 0xffff; + res->in_use = false; + tdev.total_timvfs++; + rte_wmb(); + + timvf_log_dbg("Domain=%d VFid=%d bar0 %p total_timvfs=%d", res->domain, + res->vfid, pci_dev->mem_resource[0].addr, + tdev.total_timvfs); + return 0; +} + + +static const struct rte_pci_id pci_timvf_map[] = { + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_OCTEONTX_TIM_VF) + }, + { + .vendor_id = 0, + }, +}; + +static struct rte_pci_driver pci_timvf = { + .id_table = pci_timvf_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA, + .probe = timvf_probe, + .remove = NULL, +}; + +RTE_PMD_REGISTER_PCI(octeontx_timvf, pci_timvf); diff --git a/src/spdk/dpdk/drivers/event/octeontx/timvf_worker.c b/src/spdk/dpdk/drivers/event/octeontx/timvf_worker.c new file mode 100644 index 000000000..50790e199 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/timvf_worker.c @@ -0,0 +1,199 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include "timvf_worker.h" + +static inline int +timvf_timer_reg_checks(const struct timvf_ring * const timr, + struct rte_event_timer * const tim) +{ + if (unlikely(tim->state)) { + tim->state = RTE_EVENT_TIMER_ERROR; + rte_errno = EALREADY; + goto fail; + } + + if (unlikely(!tim->timeout_ticks || + tim->timeout_ticks >= timr->nb_bkts)) { + tim->state = tim->timeout_ticks ? RTE_EVENT_TIMER_ERROR_TOOLATE + : RTE_EVENT_TIMER_ERROR_TOOEARLY; + rte_errno = EINVAL; + goto fail; + } + + return 0; +fail: + return -EINVAL; +} + +static inline void +timvf_format_event(const struct rte_event_timer * const tim, + struct tim_mem_entry * const entry) +{ + entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | + (tim->ev.event & 0xFFFFFFFFF); + entry->wqe = tim->ev.u64; +} + +uint16_t +timvf_timer_cancel_burst(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers) +{ + RTE_SET_USED(adptr); + int ret; + uint16_t index; + + for (index = 0; index < nb_timers; index++) { + if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) { + rte_errno = EALREADY; + break; + } + + if (tim[index]->state != RTE_EVENT_TIMER_ARMED) { + rte_errno = EINVAL; + break; + } + ret = timvf_rem_entry(tim[index]); + if (ret) { + rte_errno = -ret; + break; + } + } + return index; +} + +uint16_t +timvf_timer_arm_burst_sp(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers) +{ + int ret; + uint16_t index; + struct tim_mem_entry entry; + struct timvf_ring *timr = adptr->data->adapter_priv; + for (index = 0; index < nb_timers; index++) { + if (timvf_timer_reg_checks(timr, tim[index])) + break; + + timvf_format_event(tim[index], &entry); + ret = timvf_add_entry_sp(timr, tim[index]->timeout_ticks, + tim[index], &entry); + if (unlikely(ret)) { + rte_errno = -ret; + break; + } + } + + return index; +} + +uint16_t +timvf_timer_arm_burst_sp_stats(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers) +{ + uint16_t ret; + struct timvf_ring *timr = adptr->data->adapter_priv; + + ret = timvf_timer_arm_burst_sp(adptr, tim, nb_timers); + timr->tim_arm_cnt += ret; + + return ret; +} + +uint16_t +timvf_timer_arm_burst_mp(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers) +{ + int ret; + uint16_t index; + struct tim_mem_entry entry; + struct timvf_ring *timr = adptr->data->adapter_priv; + for (index = 0; index < nb_timers; index++) { + if (timvf_timer_reg_checks(timr, tim[index])) + break; + timvf_format_event(tim[index], &entry); + ret = timvf_add_entry_mp(timr, tim[index]->timeout_ticks, + tim[index], &entry); + if (unlikely(ret)) { + rte_errno = -ret; + break; + } + } + + return index; +} + +uint16_t +timvf_timer_arm_burst_mp_stats(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers) +{ + uint16_t ret; + struct timvf_ring *timr = adptr->data->adapter_priv; + + ret = timvf_timer_arm_burst_mp(adptr, tim, nb_timers); + timr->tim_arm_cnt += ret; + + return ret; +} + +uint16_t +timvf_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint64_t timeout_tick, + const uint16_t nb_timers) +{ + int ret; + uint16_t set_timers = 0; + uint16_t idx; + uint16_t arr_idx = 0; + struct timvf_ring *timr = adptr->data->adapter_priv; + struct tim_mem_entry entry[TIMVF_MAX_BURST] __rte_cache_aligned; + + if (unlikely(!timeout_tick || timeout_tick >= timr->nb_bkts)) { + const enum rte_event_timer_state state = timeout_tick ? + RTE_EVENT_TIMER_ERROR_TOOLATE : + RTE_EVENT_TIMER_ERROR_TOOEARLY; + for (idx = 0; idx < nb_timers; idx++) + tim[idx]->state = state; + rte_errno = EINVAL; + return 0; + } + + while (arr_idx < nb_timers) { + for (idx = 0; idx < TIMVF_MAX_BURST && (arr_idx < nb_timers); + idx++, arr_idx++) { + timvf_format_event(tim[arr_idx], &entry[idx]); + } + ret = timvf_add_entry_brst(timr, timeout_tick, &tim[set_timers], + entry, idx); + set_timers += ret; + if (ret != idx) + break; + } + + return set_timers; +} + + +uint16_t +timvf_timer_arm_tmo_brst_stats(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint64_t timeout_tick, + const uint16_t nb_timers) +{ + uint16_t set_timers; + struct timvf_ring *timr = adptr->data->adapter_priv; + + set_timers = timvf_timer_arm_tmo_brst(adptr, tim, timeout_tick, + nb_timers); + timr->tim_arm_cnt += set_timers; + + return set_timers; +} + +void +timvf_set_chunk_refill(struct timvf_ring * const timr, uint8_t use_fpa) +{ + if (use_fpa) + timr->refill_chunk = timvf_refill_chunk_fpa; + else + timr->refill_chunk = timvf_refill_chunk_generic; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx/timvf_worker.h b/src/spdk/dpdk/drivers/event/octeontx/timvf_worker.h new file mode 100644 index 000000000..dede1a4a4 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx/timvf_worker.h @@ -0,0 +1,443 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include <rte_common.h> +#include <rte_branch_prediction.h> + +#include "timvf_evdev.h" + +static inline int16_t +timr_bkt_fetch_rem(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) & + TIM_BUCKET_W1_M_CHUNK_REMAINDER; +} + +static inline int16_t +timr_bkt_get_rem(struct tim_mem_bucket *bktp) +{ + return __atomic_load_n(&bktp->chunk_remainder, + __ATOMIC_ACQUIRE); +} + +static inline void +timr_bkt_set_rem(struct tim_mem_bucket *bktp, uint16_t v) +{ + __atomic_store_n(&bktp->chunk_remainder, v, + __ATOMIC_RELEASE); +} + +static inline void +timr_bkt_sub_rem(struct tim_mem_bucket *bktp, uint16_t v) +{ + __atomic_fetch_sub(&bktp->chunk_remainder, v, + __ATOMIC_RELEASE); +} + +static inline uint8_t +timr_bkt_get_sbt(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_SBT) & TIM_BUCKET_W1_M_SBT; +} + +static inline uint64_t +timr_bkt_set_sbt(struct tim_mem_bucket *bktp) +{ + const uint64_t v = TIM_BUCKET_W1_M_SBT << TIM_BUCKET_W1_S_SBT; + return __atomic_fetch_or(&bktp->w1, v, __ATOMIC_ACQ_REL); +} + +static inline uint64_t +timr_bkt_clr_sbt(struct tim_mem_bucket *bktp) +{ + const uint64_t v = ~(TIM_BUCKET_W1_M_SBT << TIM_BUCKET_W1_S_SBT); + return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL); +} + +static inline uint8_t +timr_bkt_get_shbt(uint64_t w1) +{ + return ((w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT) | + ((w1 >> TIM_BUCKET_W1_S_SBT) & TIM_BUCKET_W1_M_SBT); +} + +static inline uint8_t +timr_bkt_get_hbt(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT; +} + +static inline uint8_t +timr_bkt_get_bsk(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK; +} + +static inline uint64_t +timr_bkt_clr_bsk(struct tim_mem_bucket *bktp) +{ + /*Clear everything except lock. */ + const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK; + return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL); +} + +static inline uint64_t +timr_bkt_fetch_sema_lock(struct tim_mem_bucket *bktp) +{ + return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK, + __ATOMIC_ACQ_REL); +} + +static inline uint64_t +timr_bkt_fetch_sema(struct tim_mem_bucket *bktp) +{ + return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, + __ATOMIC_RELAXED); +} + +static inline uint64_t +timr_bkt_inc_lock(struct tim_mem_bucket *bktp) +{ + const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK; + return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQ_REL); +} + +static inline void +timr_bkt_dec_lock(struct tim_mem_bucket *bktp) +{ + __atomic_add_fetch(&bktp->lock, 0xff, __ATOMIC_ACQ_REL); +} + +static inline uint32_t +timr_bkt_get_nent(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) & + TIM_BUCKET_W1_M_NUM_ENTRIES; +} + +static inline void +timr_bkt_inc_nent(struct tim_mem_bucket *bktp) +{ + __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED); +} + +static inline void +timr_bkt_add_nent(struct tim_mem_bucket *bktp, uint32_t v) +{ + __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED); +} + +static inline uint64_t +timr_bkt_clr_nent(struct tim_mem_bucket *bktp) +{ + const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES << + TIM_BUCKET_W1_S_NUM_ENTRIES); + return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL); +} + +static inline struct tim_mem_entry * +timr_clr_bkt(struct timvf_ring * const timr, struct tim_mem_bucket * const bkt) +{ + struct tim_mem_entry *chunk; + struct tim_mem_entry *pnext; + chunk = ((struct tim_mem_entry *)(uintptr_t)bkt->first_chunk); + chunk = (struct tim_mem_entry *)(uintptr_t)(chunk + nb_chunk_slots)->w0; + + while (chunk) { + pnext = (struct tim_mem_entry *)(uintptr_t) + ((chunk + nb_chunk_slots)->w0); + rte_mempool_put(timr->chunk_pool, chunk); + chunk = pnext; + } + return (struct tim_mem_entry *)(uintptr_t)bkt->first_chunk; +} + +static inline int +timvf_rem_entry(struct rte_event_timer *tim) +{ + uint64_t lock_sema; + struct tim_mem_entry *entry; + struct tim_mem_bucket *bkt; + if (tim->impl_opaque[1] == 0 || + tim->impl_opaque[0] == 0) + return -ENOENT; + + entry = (struct tim_mem_entry *)(uintptr_t)tim->impl_opaque[0]; + if (entry->wqe != tim->ev.u64) { + tim->impl_opaque[1] = tim->impl_opaque[0] = 0; + return -ENOENT; + } + bkt = (struct tim_mem_bucket *)(uintptr_t)tim->impl_opaque[1]; + lock_sema = timr_bkt_inc_lock(bkt); + if (timr_bkt_get_shbt(lock_sema) + || !timr_bkt_get_nent(lock_sema)) { + timr_bkt_dec_lock(bkt); + tim->impl_opaque[1] = tim->impl_opaque[0] = 0; + return -ENOENT; + } + + entry->w0 = entry->wqe = 0; + timr_bkt_dec_lock(bkt); + + tim->state = RTE_EVENT_TIMER_CANCELED; + tim->impl_opaque[1] = tim->impl_opaque[0] = 0; + return 0; +} + +static inline struct tim_mem_entry * +timvf_refill_chunk_generic(struct tim_mem_bucket * const bkt, + struct timvf_ring * const timr) +{ + struct tim_mem_entry *chunk; + + if (bkt->nb_entry || !bkt->first_chunk) { + if (unlikely(rte_mempool_get(timr->chunk_pool, + (void **)&chunk))) { + return NULL; + } + if (bkt->nb_entry) { + *(uint64_t *)(((struct tim_mem_entry *)(uintptr_t) + bkt->current_chunk) + + nb_chunk_slots) = + (uintptr_t) chunk; + } else { + bkt->first_chunk = (uintptr_t) chunk; + } + } else { + chunk = timr_clr_bkt(timr, bkt); + bkt->first_chunk = (uintptr_t)chunk; + } + *(uint64_t *)(chunk + nb_chunk_slots) = 0; + + return chunk; +} + +static inline struct tim_mem_entry * +timvf_refill_chunk_fpa(struct tim_mem_bucket * const bkt, + struct timvf_ring * const timr) +{ + struct tim_mem_entry *chunk; + + if (unlikely(rte_mempool_get(timr->chunk_pool, (void **)&chunk))) + return NULL; + + *(uint64_t *)(chunk + nb_chunk_slots) = 0; + if (bkt->nb_entry) { + *(uint64_t *)(((struct tim_mem_entry *)(uintptr_t) + bkt->current_chunk) + + nb_chunk_slots) = + (uintptr_t) chunk; + } else { + bkt->first_chunk = (uintptr_t) chunk; + } + + return chunk; +} + +static inline struct tim_mem_bucket * +timvf_get_target_bucket(struct timvf_ring * const timr, const uint32_t rel_bkt) +{ + const uint64_t bkt_cyc = rte_rdtsc() - timr->ring_start_cyc; + const uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc, + &timr->fast_div) + rel_bkt; + const uint32_t tbkt_id = timr->get_target_bkt(bucket, + timr->nb_bkts); + return &timr->bkt[tbkt_id]; +} + +/* Single producer functions. */ +static inline int +timvf_add_entry_sp(struct timvf_ring * const timr, const uint32_t rel_bkt, + struct rte_event_timer * const tim, + const struct tim_mem_entry * const pent) +{ + int16_t rem; + uint64_t lock_sema; + struct tim_mem_bucket *bkt; + struct tim_mem_entry *chunk; + + + bkt = timvf_get_target_bucket(timr, rel_bkt); +__retry: + /*Get Bucket sema*/ + lock_sema = timr_bkt_fetch_sema(bkt); + /* Bucket related checks. */ + if (unlikely(timr_bkt_get_hbt(lock_sema))) + goto __retry; + + /* Insert the work. */ + rem = timr_bkt_fetch_rem(lock_sema); + + if (!rem) { + chunk = timr->refill_chunk(bkt, timr); + if (unlikely(chunk == NULL)) { + timr_bkt_set_rem(bkt, 0); + tim->impl_opaque[0] = tim->impl_opaque[1] = 0; + tim->state = RTE_EVENT_TIMER_ERROR; + return -ENOMEM; + } + bkt->current_chunk = (uintptr_t) chunk; + timr_bkt_set_rem(bkt, nb_chunk_slots - 1); + } else { + chunk = (struct tim_mem_entry *)(uintptr_t)bkt->current_chunk; + chunk += nb_chunk_slots - rem; + } + /* Copy work entry. */ + *chunk = *pent; + timr_bkt_inc_nent(bkt); + + tim->impl_opaque[0] = (uintptr_t)chunk; + tim->impl_opaque[1] = (uintptr_t)bkt; + tim->state = RTE_EVENT_TIMER_ARMED; + return 0; +} + +/* Multi producer functions. */ +static inline int +timvf_add_entry_mp(struct timvf_ring * const timr, const uint32_t rel_bkt, + struct rte_event_timer * const tim, + const struct tim_mem_entry * const pent) +{ + int16_t rem; + uint64_t lock_sema; + struct tim_mem_bucket *bkt; + struct tim_mem_entry *chunk; + +__retry: + bkt = timvf_get_target_bucket(timr, rel_bkt); + /* Bucket related checks. */ + /*Get Bucket sema*/ + lock_sema = timr_bkt_fetch_sema_lock(bkt); + if (unlikely(timr_bkt_get_shbt(lock_sema))) { + timr_bkt_dec_lock(bkt); + goto __retry; + } + + rem = timr_bkt_fetch_rem(lock_sema); + + if (rem < 0) { + /* goto diff bucket. */ + timr_bkt_dec_lock(bkt); + goto __retry; + } else if (!rem) { + /*Only one thread can be here*/ + chunk = timr->refill_chunk(bkt, timr); + if (unlikely(chunk == NULL)) { + timr_bkt_set_rem(bkt, 0); + timr_bkt_dec_lock(bkt); + tim->impl_opaque[0] = tim->impl_opaque[1] = 0; + tim->state = RTE_EVENT_TIMER_ERROR; + return -ENOMEM; + } + bkt->current_chunk = (uintptr_t) chunk; + timr_bkt_set_rem(bkt, nb_chunk_slots - 1); + } else { + chunk = (struct tim_mem_entry *)(uintptr_t)bkt->current_chunk; + chunk += nb_chunk_slots - rem; + } + /* Copy work entry. */ + *chunk = *pent; + timr_bkt_inc_nent(bkt); + timr_bkt_dec_lock(bkt); + + tim->impl_opaque[0] = (uintptr_t)chunk; + tim->impl_opaque[1] = (uintptr_t)bkt; + tim->state = RTE_EVENT_TIMER_ARMED; + return 0; +} + +static inline uint16_t +timvf_cpy_wrk(uint16_t index, uint16_t cpy_lmt, + struct tim_mem_entry *chunk, + struct rte_event_timer ** const tim, + const struct tim_mem_entry * const ents, + const struct tim_mem_bucket * const bkt) +{ + for (; index < cpy_lmt; index++) { + *chunk = *(ents + index); + tim[index]->impl_opaque[0] = (uintptr_t)chunk++; + tim[index]->impl_opaque[1] = (uintptr_t)bkt; + tim[index]->state = RTE_EVENT_TIMER_ARMED; + } + + return index; +} + +/* Burst mode functions */ +static inline int +timvf_add_entry_brst(struct timvf_ring * const timr, const uint16_t rel_bkt, + struct rte_event_timer ** const tim, + const struct tim_mem_entry *ents, + const uint16_t nb_timers) +{ + int16_t rem; + int16_t crem; + uint8_t lock_cnt; + uint16_t index = 0; + uint16_t chunk_remainder; + uint64_t lock_sema; + struct tim_mem_bucket *bkt; + struct tim_mem_entry *chunk; + +__retry: + bkt = timvf_get_target_bucket(timr, rel_bkt); + + /* Only one thread beyond this. */ + lock_sema = timr_bkt_inc_lock(bkt); + lock_cnt = (uint8_t) + ((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK); + + if (lock_cnt) { + timr_bkt_dec_lock(bkt); + goto __retry; + } + + /* Bucket related checks. */ + if (unlikely(timr_bkt_get_hbt(lock_sema))) { + timr_bkt_dec_lock(bkt); + goto __retry; + } + + chunk_remainder = timr_bkt_fetch_rem(lock_sema); + rem = chunk_remainder - nb_timers; + if (rem < 0) { + crem = nb_chunk_slots - chunk_remainder; + if (chunk_remainder && crem) { + chunk = ((struct tim_mem_entry *) + (uintptr_t)bkt->current_chunk) + crem; + + index = timvf_cpy_wrk(index, chunk_remainder, + chunk, tim, ents, bkt); + timr_bkt_sub_rem(bkt, chunk_remainder); + timr_bkt_add_nent(bkt, chunk_remainder); + } + rem = nb_timers - chunk_remainder; + ents = ents + chunk_remainder; + + chunk = timr->refill_chunk(bkt, timr); + if (unlikely(chunk == NULL)) { + timr_bkt_dec_lock(bkt); + rte_errno = ENOMEM; + tim[index]->state = RTE_EVENT_TIMER_ERROR; + return crem; + } + *(uint64_t *)(chunk + nb_chunk_slots) = 0; + bkt->current_chunk = (uintptr_t) chunk; + + index = timvf_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt); + timr_bkt_set_rem(bkt, nb_chunk_slots - rem); + timr_bkt_add_nent(bkt, rem); + } else { + chunk = (struct tim_mem_entry *)(uintptr_t)bkt->current_chunk; + chunk += (nb_chunk_slots - chunk_remainder); + + index = timvf_cpy_wrk(index, nb_timers, + chunk, tim, ents, bkt); + timr_bkt_sub_rem(bkt, nb_timers); + timr_bkt_add_nent(bkt, nb_timers); + } + + timr_bkt_dec_lock(bkt); + return nb_timers; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx2/Makefile b/src/spdk/dpdk/drivers/event/octeontx2/Makefile new file mode 100644 index 000000000..9d67b00c6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/Makefile @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(C) 2019 Marvell International Ltd. +# + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_octeontx2_event.a + +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -I$(RTE_SDK)/drivers/common/octeontx2 +CFLAGS += -I$(RTE_SDK)/drivers/crypto/octeontx2 +CFLAGS += -I$(RTE_SDK)/drivers/mempool/octeontx2 +CFLAGS += -I$(RTE_SDK)/drivers/event/octeontx2 +CFLAGS += -I$(RTE_SDK)/drivers/net/octeontx2 +CFLAGS += -O3 + +ifneq ($(CONFIG_RTE_ARCH_64),y) +CFLAGS += -Wno-int-to-pointer-cast +CFLAGS += -Wno-pointer-to-int-cast +ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) +CFLAGS += -diag-disable 2259 +endif +endif + +EXPORT_MAP := rte_pmd_octeontx2_event_version.map + +# +# all source are stored in SRCS-y +# + +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += otx2_worker_dual.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += otx2_worker.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += otx2_tim_worker.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += otx2_evdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += otx2_evdev_adptr.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += otx2_tim_evdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += otx2_evdev_selftest.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX2_EVENTDEV) += otx2_evdev_irq.c + +LDLIBS += -lrte_eal -lrte_bus_pci -lrte_pci -lrte_kvargs +LDLIBS += -lrte_mempool -lrte_eventdev -lrte_mbuf -lrte_ethdev +LDLIBS += -lrte_common_octeontx2 -lrte_mempool_octeontx2 + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/event/octeontx2/meson.build b/src/spdk/dpdk/drivers/event/octeontx2/meson.build new file mode 100644 index 000000000..0ade51cec --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/meson.build @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(C) 2019 Marvell International Ltd. +# + +sources = files('otx2_worker.c', + 'otx2_worker_dual.c', + 'otx2_evdev.c', + 'otx2_evdev_adptr.c', + 'otx2_evdev_irq.c', + 'otx2_evdev_selftest.c', + 'otx2_tim_evdev.c', + 'otx2_tim_worker.c' + ) + +extra_flags = [] +# This integrated controller runs only on a arm64 machine, remove 32bit warnings +if not dpdk_conf.get('RTE_ARCH_64') + extra_flags += ['-Wno-int-to-pointer-cast', '-Wno-pointer-to-int-cast'] +endif + +foreach flag: extra_flags + if cc.has_argument(flag) + cflags += flag + endif +endforeach + +deps += ['bus_pci', 'common_octeontx2', 'mempool_octeontx2', 'pmd_octeontx2'] + +includes += include_directories('../../crypto/octeontx2') diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev.c b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev.c new file mode 100644 index 000000000..630073de5 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev.c @@ -0,0 +1,1825 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#include <inttypes.h> + +#include <rte_bus_pci.h> +#include <rte_common.h> +#include <rte_eal.h> +#include <rte_eventdev_pmd_pci.h> +#include <rte_kvargs.h> +#include <rte_mbuf_pool_ops.h> +#include <rte_pci.h> + +#include "otx2_evdev_stats.h" +#include "otx2_evdev.h" +#include "otx2_irq.h" +#include "otx2_tim_evdev.h" + +static inline int +sso_get_msix_offsets(const struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint8_t nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1); + struct otx2_mbox *mbox = dev->mbox; + struct msix_offset_rsp *msix_rsp; + int i, rc; + + /* Get SSO and SSOW MSIX vector offsets */ + otx2_mbox_alloc_msg_msix_offset(mbox); + rc = otx2_mbox_process_msg(mbox, (void *)&msix_rsp); + + for (i = 0; i < nb_ports; i++) + dev->ssow_msixoff[i] = msix_rsp->ssow_msixoff[i]; + + for (i = 0; i < dev->nb_event_queues; i++) + dev->sso_msixoff[i] = msix_rsp->sso_msixoff[i]; + + return rc; +} + +void +sso_fastpath_fns_set(struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + /* Single WS modes */ + const event_dequeue_t ssogws_deq[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = otx2_ssogws_deq_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_burst_t ssogws_deq_burst[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = otx2_ssogws_deq_burst_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_t ssogws_deq_timeout[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = otx2_ssogws_deq_timeout_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_burst_t + ssogws_deq_timeout_burst[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_deq_timeout_burst_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_t ssogws_deq_seg[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = otx2_ssogws_deq_seg_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_burst_t + ssogws_deq_seg_burst[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_deq_seg_burst_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_t ssogws_deq_seg_timeout[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_deq_seg_timeout_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_burst_t + ssogws_deq_seg_timeout_burst[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_deq_seg_timeout_burst_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + + /* Dual WS modes */ + const event_dequeue_t ssogws_dual_deq[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = otx2_ssogws_dual_deq_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_burst_t + ssogws_dual_deq_burst[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_dual_deq_burst_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_t ssogws_dual_deq_timeout[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_dual_deq_timeout_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_burst_t + ssogws_dual_deq_timeout_burst[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_dual_deq_timeout_burst_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_t ssogws_dual_deq_seg[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = otx2_ssogws_dual_deq_seg_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_burst_t + ssogws_dual_deq_seg_burst[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_dual_deq_seg_burst_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_t + ssogws_dual_deq_seg_timeout[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_dual_deq_seg_timeout_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + const event_dequeue_burst_t + ssogws_dual_deq_seg_timeout_burst[2][2][2][2][2][2][2] = { +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_dual_deq_seg_timeout_burst_ ##name, +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + }; + + /* Tx modes */ + const event_tx_adapter_enqueue + ssogws_tx_adptr_enq[2][2][2][2][2][2][2] = { +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_tx_adptr_enq_ ## name, +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + }; + + const event_tx_adapter_enqueue + ssogws_tx_adptr_enq_seg[2][2][2][2][2][2][2] = { +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_tx_adptr_enq_seg_ ## name, +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + }; + + const event_tx_adapter_enqueue + ssogws_dual_tx_adptr_enq[2][2][2][2][2][2][2] = { +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_dual_tx_adptr_enq_ ## name, +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + }; + + const event_tx_adapter_enqueue + ssogws_dual_tx_adptr_enq_seg[2][2][2][2][2][2][2] = { +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ + [f6][f5][f4][f3][f2][f1][f0] = \ + otx2_ssogws_dual_tx_adptr_enq_seg_ ## name, +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + }; + + event_dev->enqueue = otx2_ssogws_enq; + event_dev->enqueue_burst = otx2_ssogws_enq_burst; + event_dev->enqueue_new_burst = otx2_ssogws_enq_new_burst; + event_dev->enqueue_forward_burst = otx2_ssogws_enq_fwd_burst; + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = ssogws_deq_seg + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = ssogws_deq_seg_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = ssogws_deq_seg_timeout + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + ssogws_deq_seg_timeout_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = ssogws_deq + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = ssogws_deq_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = ssogws_deq_timeout + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + ssogws_deq_timeout_burst + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + } + } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = ssogws_tx_adptr_enq_seg + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_SECURITY_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = ssogws_tx_adptr_enq + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_SECURITY_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + + if (dev->dual_ws) { + event_dev->enqueue = otx2_ssogws_dual_enq; + event_dev->enqueue_burst = otx2_ssogws_dual_enq_burst; + event_dev->enqueue_new_burst = + otx2_ssogws_dual_enq_new_burst; + event_dev->enqueue_forward_burst = + otx2_ssogws_dual_enq_fwd_burst; + + if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) { + event_dev->dequeue = ssogws_dual_deq_seg + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = ssogws_dual_deq_seg_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = + ssogws_dual_deq_seg_timeout + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + ssogws_dual_deq_seg_timeout_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } else { + event_dev->dequeue = ssogws_dual_deq + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = ssogws_dual_deq_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)]; + if (dev->is_timeout_deq) { + event_dev->dequeue = + ssogws_dual_deq_timeout + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + event_dev->dequeue_burst = + ssogws_dual_deq_timeout_burst + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_SECURITY_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_TSTAMP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_MARK_UPDATE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_VLAN_STRIP_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_CHECKSUM_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_PTYPE_F)] + [!!(dev->rx_offloads & + NIX_RX_OFFLOAD_RSS_F)]; + } + } + + if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) { + /* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */ + event_dev->txa_enqueue = ssogws_dual_tx_adptr_enq_seg + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_SECURITY_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } else { + event_dev->txa_enqueue = ssogws_dual_tx_adptr_enq + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_SECURITY_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)] + [!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_MBUF_NOFF_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_VLAN_QINQ_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)] + [!!(dev->tx_offloads & + NIX_TX_OFFLOAD_L3_L4_CSUM_F)]; + } + } + + event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue; + rte_mb(); +} + +static void +otx2_sso_info_get(struct rte_eventdev *event_dev, + struct rte_event_dev_info *dev_info) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + + dev_info->driver_name = RTE_STR(EVENTDEV_NAME_OCTEONTX2_PMD); + dev_info->min_dequeue_timeout_ns = dev->min_dequeue_timeout_ns; + dev_info->max_dequeue_timeout_ns = dev->max_dequeue_timeout_ns; + dev_info->max_event_queues = dev->max_event_queues; + dev_info->max_event_queue_flows = (1ULL << 20); + dev_info->max_event_queue_priority_levels = 8; + dev_info->max_event_priority_levels = 1; + dev_info->max_event_ports = dev->max_event_ports; + dev_info->max_event_port_dequeue_depth = 1; + dev_info->max_event_port_enqueue_depth = 1; + dev_info->max_num_events = dev->max_num_events; + dev_info->event_dev_cap = RTE_EVENT_DEV_CAP_QUEUE_QOS | + RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED | + RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES | + RTE_EVENT_DEV_CAP_RUNTIME_PORT_LINK | + RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT | + RTE_EVENT_DEV_CAP_NONSEQ_MODE; +} + +static void +sso_port_link_modify(struct otx2_ssogws *ws, uint8_t queue, uint8_t enable) +{ + uintptr_t base = OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op); + uint64_t val; + + val = queue; + val |= 0ULL << 12; /* SET 0 */ + val |= 0x8000800080000000; /* Dont modify rest of the masks */ + val |= (uint64_t)enable << 14; /* Enable/Disable Membership. */ + + otx2_write64(val, base + SSOW_LF_GWS_GRPMSK_CHG); +} + +static int +otx2_sso_port_link(struct rte_eventdev *event_dev, void *port, + const uint8_t queues[], const uint8_t priorities[], + uint16_t nb_links) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint8_t port_id = 0; + uint16_t link; + + RTE_SET_USED(priorities); + for (link = 0; link < nb_links; link++) { + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = port; + + port_id = ws->port; + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[0], queues[link], true); + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[1], queues[link], true); + } else { + struct otx2_ssogws *ws = port; + + port_id = ws->port; + sso_port_link_modify(ws, queues[link], true); + } + } + sso_func_trace("Port=%d nb_links=%d", port_id, nb_links); + + return (int)nb_links; +} + +static int +otx2_sso_port_unlink(struct rte_eventdev *event_dev, void *port, + uint8_t queues[], uint16_t nb_unlinks) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint8_t port_id = 0; + uint16_t unlink; + + for (unlink = 0; unlink < nb_unlinks; unlink++) { + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = port; + + port_id = ws->port; + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[0], queues[unlink], + false); + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[1], queues[unlink], + false); + } else { + struct otx2_ssogws *ws = port; + + port_id = ws->port; + sso_port_link_modify(ws, queues[unlink], false); + } + } + sso_func_trace("Port=%d nb_unlinks=%d", port_id, nb_unlinks); + + return (int)nb_unlinks; +} + +static int +sso_hw_lf_cfg(struct otx2_mbox *mbox, enum otx2_sso_lf_type type, + uint16_t nb_lf, uint8_t attach) +{ + if (attach) { + struct rsrc_attach_req *req; + + req = otx2_mbox_alloc_msg_attach_resources(mbox); + switch (type) { + case SSO_LF_GGRP: + req->sso = nb_lf; + break; + case SSO_LF_GWS: + req->ssow = nb_lf; + break; + default: + return -EINVAL; + } + req->modify = true; + if (otx2_mbox_process(mbox) < 0) + return -EIO; + } else { + struct rsrc_detach_req *req; + + req = otx2_mbox_alloc_msg_detach_resources(mbox); + switch (type) { + case SSO_LF_GGRP: + req->sso = true; + break; + case SSO_LF_GWS: + req->ssow = true; + break; + default: + return -EINVAL; + } + req->partial = true; + if (otx2_mbox_process(mbox) < 0) + return -EIO; + } + + return 0; +} + +static int +sso_lf_cfg(struct otx2_sso_evdev *dev, struct otx2_mbox *mbox, + enum otx2_sso_lf_type type, uint16_t nb_lf, uint8_t alloc) +{ + void *rsp; + int rc; + + if (alloc) { + switch (type) { + case SSO_LF_GGRP: + { + struct sso_lf_alloc_req *req_ggrp; + req_ggrp = otx2_mbox_alloc_msg_sso_lf_alloc(mbox); + req_ggrp->hwgrps = nb_lf; + } + break; + case SSO_LF_GWS: + { + struct ssow_lf_alloc_req *req_hws; + req_hws = otx2_mbox_alloc_msg_ssow_lf_alloc(mbox); + req_hws->hws = nb_lf; + } + break; + default: + return -EINVAL; + } + } else { + switch (type) { + case SSO_LF_GGRP: + { + struct sso_lf_free_req *req_ggrp; + req_ggrp = otx2_mbox_alloc_msg_sso_lf_free(mbox); + req_ggrp->hwgrps = nb_lf; + } + break; + case SSO_LF_GWS: + { + struct ssow_lf_free_req *req_hws; + req_hws = otx2_mbox_alloc_msg_ssow_lf_free(mbox); + req_hws->hws = nb_lf; + } + break; + default: + return -EINVAL; + } + } + + rc = otx2_mbox_process_msg_tmo(mbox, (void **)&rsp, ~0); + if (rc < 0) + return rc; + + if (alloc && type == SSO_LF_GGRP) { + struct sso_lf_alloc_rsp *rsp_ggrp = rsp; + + dev->xaq_buf_size = rsp_ggrp->xaq_buf_size; + dev->xae_waes = rsp_ggrp->xaq_wq_entries; + dev->iue = rsp_ggrp->in_unit_entries; + } + + return 0; +} + +static void +otx2_sso_port_release(void *port) +{ + rte_free(port); +} + +static void +otx2_sso_queue_release(struct rte_eventdev *event_dev, uint8_t queue_id) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(queue_id); +} + +static void +sso_clr_links(const struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + int i, j; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws; + + ws = event_dev->data->ports[i]; + for (j = 0; j < dev->nb_event_queues; j++) { + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[0], j, false); + sso_port_link_modify((struct otx2_ssogws *) + &ws->ws_state[1], j, false); + } + } else { + struct otx2_ssogws *ws; + + ws = event_dev->data->ports[i]; + for (j = 0; j < dev->nb_event_queues; j++) + sso_port_link_modify(ws, j, false); + } + } +} + +static void +sso_set_port_ops(struct otx2_ssogws *ws, uintptr_t base) +{ + ws->tag_op = base + SSOW_LF_GWS_TAG; + ws->wqp_op = base + SSOW_LF_GWS_WQP; + ws->getwrk_op = base + SSOW_LF_GWS_OP_GET_WORK; + ws->swtp_op = base + SSOW_LF_GWS_SWTP; + ws->swtag_norm_op = base + SSOW_LF_GWS_OP_SWTAG_NORM; + ws->swtag_desched_op = base + SSOW_LF_GWS_OP_SWTAG_DESCHED; +} + +static int +sso_configure_dual_ports(const struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct otx2_mbox *mbox = dev->mbox; + uint8_t vws = 0; + uint8_t nb_lf; + int i, rc; + + otx2_sso_dbg("Configuring event ports %d", dev->nb_event_ports); + + nb_lf = dev->nb_event_ports * 2; + /* Ask AF to attach required LFs. */ + rc = sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, true); + if (rc < 0) { + otx2_err("Failed to attach SSO GWS LF"); + return -ENODEV; + } + + if (sso_lf_cfg(dev, mbox, SSO_LF_GWS, nb_lf, true) < 0) { + sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, false); + otx2_err("Failed to init SSO GWS LF"); + return -ENODEV; + } + + for (i = 0; i < dev->nb_event_ports; i++) { + struct otx2_ssogws_dual *ws; + uintptr_t base; + + /* Free memory prior to re-allocation if needed */ + if (event_dev->data->ports[i] != NULL) { + ws = event_dev->data->ports[i]; + rte_free(ws); + ws = NULL; + } + + /* Allocate event port memory */ + ws = rte_zmalloc_socket("otx2_sso_ws", + sizeof(struct otx2_ssogws_dual), + RTE_CACHE_LINE_SIZE, + event_dev->data->socket_id); + if (ws == NULL) { + otx2_err("Failed to alloc memory for port=%d", i); + rc = -ENOMEM; + break; + } + + ws->port = i; + base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); + sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[0], base); + vws++; + + base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | vws << 12); + sso_set_port_ops((struct otx2_ssogws *)&ws->ws_state[1], base); + vws++; + + event_dev->data->ports[i] = ws; + } + + if (rc < 0) { + sso_lf_cfg(dev, mbox, SSO_LF_GWS, nb_lf, false); + sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, false); + } + + return rc; +} + +static int +sso_configure_ports(const struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct otx2_mbox *mbox = dev->mbox; + uint8_t nb_lf; + int i, rc; + + otx2_sso_dbg("Configuring event ports %d", dev->nb_event_ports); + + nb_lf = dev->nb_event_ports; + /* Ask AF to attach required LFs. */ + rc = sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, true); + if (rc < 0) { + otx2_err("Failed to attach SSO GWS LF"); + return -ENODEV; + } + + if (sso_lf_cfg(dev, mbox, SSO_LF_GWS, nb_lf, true) < 0) { + sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, false); + otx2_err("Failed to init SSO GWS LF"); + return -ENODEV; + } + + for (i = 0; i < nb_lf; i++) { + struct otx2_ssogws *ws; + uintptr_t base; + + /* Free memory prior to re-allocation if needed */ + if (event_dev->data->ports[i] != NULL) { + ws = event_dev->data->ports[i]; + rte_free(ws); + ws = NULL; + } + + /* Allocate event port memory */ + ws = rte_zmalloc_socket("otx2_sso_ws", + sizeof(struct otx2_ssogws), + RTE_CACHE_LINE_SIZE, + event_dev->data->socket_id); + if (ws == NULL) { + otx2_err("Failed to alloc memory for port=%d", i); + rc = -ENOMEM; + break; + } + + ws->port = i; + base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | i << 12); + sso_set_port_ops(ws, base); + + event_dev->data->ports[i] = ws; + } + + if (rc < 0) { + sso_lf_cfg(dev, mbox, SSO_LF_GWS, nb_lf, false); + sso_hw_lf_cfg(mbox, SSO_LF_GWS, nb_lf, false); + } + + return rc; +} + +static int +sso_configure_queues(const struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct otx2_mbox *mbox = dev->mbox; + uint8_t nb_lf; + int rc; + + otx2_sso_dbg("Configuring event queues %d", dev->nb_event_queues); + + nb_lf = dev->nb_event_queues; + /* Ask AF to attach required LFs. */ + rc = sso_hw_lf_cfg(mbox, SSO_LF_GGRP, nb_lf, true); + if (rc < 0) { + otx2_err("Failed to attach SSO GGRP LF"); + return -ENODEV; + } + + if (sso_lf_cfg(dev, mbox, SSO_LF_GGRP, nb_lf, true) < 0) { + sso_hw_lf_cfg(mbox, SSO_LF_GGRP, nb_lf, false); + otx2_err("Failed to init SSO GGRP LF"); + return -ENODEV; + } + + return rc; +} + +static int +sso_xaq_allocate(struct otx2_sso_evdev *dev) +{ + const struct rte_memzone *mz; + struct npa_aura_s *aura; + static int reconfig_cnt; + char pool_name[RTE_MEMZONE_NAMESIZE]; + uint32_t xaq_cnt; + int rc; + + if (dev->xaq_pool) + rte_mempool_free(dev->xaq_pool); + + /* + * Allocate memory for Add work backpressure. + */ + mz = rte_memzone_lookup(OTX2_SSO_FC_NAME); + if (mz == NULL) + mz = rte_memzone_reserve_aligned(OTX2_SSO_FC_NAME, + OTX2_ALIGN + + sizeof(struct npa_aura_s), + rte_socket_id(), + RTE_MEMZONE_IOVA_CONTIG, + OTX2_ALIGN); + if (mz == NULL) { + otx2_err("Failed to allocate mem for fcmem"); + return -ENOMEM; + } + + dev->fc_iova = mz->iova; + dev->fc_mem = mz->addr; + + aura = (struct npa_aura_s *)((uintptr_t)dev->fc_mem + OTX2_ALIGN); + memset(aura, 0, sizeof(struct npa_aura_s)); + + aura->fc_ena = 1; + aura->fc_addr = dev->fc_iova; + aura->fc_hyst_bits = 0; /* Store count on all updates */ + + /* Taken from HRM 14.3.3(4) */ + xaq_cnt = dev->nb_event_queues * OTX2_SSO_XAQ_CACHE_CNT; + if (dev->xae_cnt) + xaq_cnt += dev->xae_cnt / dev->xae_waes; + else if (dev->adptr_xae_cnt) + xaq_cnt += (dev->adptr_xae_cnt / dev->xae_waes) + + (OTX2_SSO_XAQ_SLACK * dev->nb_event_queues); + else + xaq_cnt += (dev->iue / dev->xae_waes) + + (OTX2_SSO_XAQ_SLACK * dev->nb_event_queues); + + otx2_sso_dbg("Configuring %d xaq buffers", xaq_cnt); + /* Setup XAQ based on number of nb queues. */ + snprintf(pool_name, 30, "otx2_xaq_buf_pool_%d", reconfig_cnt); + dev->xaq_pool = (void *)rte_mempool_create_empty(pool_name, + xaq_cnt, dev->xaq_buf_size, 0, 0, + rte_socket_id(), 0); + + if (dev->xaq_pool == NULL) { + otx2_err("Unable to create empty mempool."); + rte_memzone_free(mz); + return -ENOMEM; + } + + rc = rte_mempool_set_ops_byname(dev->xaq_pool, + rte_mbuf_platform_mempool_ops(), aura); + if (rc != 0) { + otx2_err("Unable to set xaqpool ops."); + goto alloc_fail; + } + + rc = rte_mempool_populate_default(dev->xaq_pool); + if (rc < 0) { + otx2_err("Unable to set populate xaqpool."); + goto alloc_fail; + } + reconfig_cnt++; + /* When SW does addwork (enqueue) check if there is space in XAQ by + * comparing fc_addr above against the xaq_lmt calculated below. + * There should be a minimum headroom (OTX2_SSO_XAQ_SLACK / 2) for SSO + * to request XAQ to cache them even before enqueue is called. + */ + dev->xaq_lmt = xaq_cnt - (OTX2_SSO_XAQ_SLACK / 2 * + dev->nb_event_queues); + dev->nb_xaq_cfg = xaq_cnt; + + return 0; +alloc_fail: + rte_mempool_free(dev->xaq_pool); + rte_memzone_free(mz); + return rc; +} + +static int +sso_ggrp_alloc_xaq(struct otx2_sso_evdev *dev) +{ + struct otx2_mbox *mbox = dev->mbox; + struct sso_hw_setconfig *req; + + otx2_sso_dbg("Configuring XAQ for GGRPs"); + req = otx2_mbox_alloc_msg_sso_hw_setconfig(mbox); + req->npa_pf_func = otx2_npa_pf_func_get(); + req->npa_aura_id = npa_lf_aura_handle_to_aura(dev->xaq_pool->pool_id); + req->hwgrps = dev->nb_event_queues; + + return otx2_mbox_process(mbox); +} + +static void +sso_lf_teardown(struct otx2_sso_evdev *dev, + enum otx2_sso_lf_type lf_type) +{ + uint8_t nb_lf; + + switch (lf_type) { + case SSO_LF_GGRP: + nb_lf = dev->nb_event_queues; + break; + case SSO_LF_GWS: + nb_lf = dev->nb_event_ports; + nb_lf *= dev->dual_ws ? 2 : 1; + break; + default: + return; + } + + sso_lf_cfg(dev, dev->mbox, lf_type, nb_lf, false); + sso_hw_lf_cfg(dev->mbox, lf_type, nb_lf, false); +} + +static int +otx2_sso_configure(const struct rte_eventdev *event_dev) +{ + struct rte_event_dev_config *conf = &event_dev->data->dev_conf; + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint32_t deq_tmo_ns; + int rc; + + sso_func_trace(); + deq_tmo_ns = conf->dequeue_timeout_ns; + + if (deq_tmo_ns == 0) + deq_tmo_ns = dev->min_dequeue_timeout_ns; + + if (deq_tmo_ns < dev->min_dequeue_timeout_ns || + deq_tmo_ns > dev->max_dequeue_timeout_ns) { + otx2_err("Unsupported dequeue timeout requested"); + return -EINVAL; + } + + if (conf->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) + dev->is_timeout_deq = 1; + + dev->deq_tmo_ns = deq_tmo_ns; + + if (conf->nb_event_ports > dev->max_event_ports || + conf->nb_event_queues > dev->max_event_queues) { + otx2_err("Unsupported event queues/ports requested"); + return -EINVAL; + } + + if (conf->nb_event_port_dequeue_depth > 1) { + otx2_err("Unsupported event port deq depth requested"); + return -EINVAL; + } + + if (conf->nb_event_port_enqueue_depth > 1) { + otx2_err("Unsupported event port enq depth requested"); + return -EINVAL; + } + + if (dev->configured) + sso_unregister_irqs(event_dev); + + if (dev->nb_event_queues) { + /* Finit any previous queues. */ + sso_lf_teardown(dev, SSO_LF_GGRP); + } + if (dev->nb_event_ports) { + /* Finit any previous ports. */ + sso_lf_teardown(dev, SSO_LF_GWS); + } + + dev->nb_event_queues = conf->nb_event_queues; + dev->nb_event_ports = conf->nb_event_ports; + + if (dev->dual_ws) + rc = sso_configure_dual_ports(event_dev); + else + rc = sso_configure_ports(event_dev); + + if (rc < 0) { + otx2_err("Failed to configure event ports"); + return -ENODEV; + } + + if (sso_configure_queues(event_dev) < 0) { + otx2_err("Failed to configure event queues"); + rc = -ENODEV; + goto teardown_hws; + } + + if (sso_xaq_allocate(dev) < 0) { + rc = -ENOMEM; + goto teardown_hwggrp; + } + + /* Clear any prior port-queue mapping. */ + sso_clr_links(event_dev); + rc = sso_ggrp_alloc_xaq(dev); + if (rc < 0) { + otx2_err("Failed to alloc xaq to ggrp %d", rc); + goto teardown_hwggrp; + } + + rc = sso_get_msix_offsets(event_dev); + if (rc < 0) { + otx2_err("Failed to get msix offsets %d", rc); + goto teardown_hwggrp; + } + + rc = sso_register_irqs(event_dev); + if (rc < 0) { + otx2_err("Failed to register irq %d", rc); + goto teardown_hwggrp; + } + + dev->configured = 1; + rte_mb(); + + return 0; +teardown_hwggrp: + sso_lf_teardown(dev, SSO_LF_GGRP); +teardown_hws: + sso_lf_teardown(dev, SSO_LF_GWS); + dev->nb_event_queues = 0; + dev->nb_event_ports = 0; + dev->configured = 0; + return rc; +} + +static void +otx2_sso_queue_def_conf(struct rte_eventdev *event_dev, uint8_t queue_id, + struct rte_event_queue_conf *queue_conf) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(queue_id); + + queue_conf->nb_atomic_flows = (1ULL << 20); + queue_conf->nb_atomic_order_sequences = (1ULL << 20); + queue_conf->event_queue_cfg = RTE_EVENT_QUEUE_CFG_ALL_TYPES; + queue_conf->priority = RTE_EVENT_DEV_PRIORITY_NORMAL; +} + +static int +otx2_sso_queue_setup(struct rte_eventdev *event_dev, uint8_t queue_id, + const struct rte_event_queue_conf *queue_conf) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct otx2_mbox *mbox = dev->mbox; + struct sso_grp_priority *req; + int rc; + + sso_func_trace("Queue=%d prio=%d", queue_id, queue_conf->priority); + + req = otx2_mbox_alloc_msg_sso_grp_set_priority(dev->mbox); + req->grp = queue_id; + req->weight = 0xFF; + req->affinity = 0xFF; + /* Normalize <0-255> to <0-7> */ + req->priority = queue_conf->priority / 32; + + rc = otx2_mbox_process(mbox); + if (rc < 0) { + otx2_err("Failed to set priority queue=%d", queue_id); + return rc; + } + + return 0; +} + +static void +otx2_sso_port_def_conf(struct rte_eventdev *event_dev, uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + + RTE_SET_USED(port_id); + port_conf->new_event_threshold = dev->max_num_events; + port_conf->dequeue_depth = 1; + port_conf->enqueue_depth = 1; +} + +static int +otx2_sso_port_setup(struct rte_eventdev *event_dev, uint8_t port_id, + const struct rte_event_port_conf *port_conf) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uintptr_t grps_base[OTX2_SSO_MAX_VHGRP] = {0}; + uint64_t val; + uint16_t q; + + sso_func_trace("Port=%d", port_id); + RTE_SET_USED(port_conf); + + if (event_dev->data->ports[port_id] == NULL) { + otx2_err("Invalid port Id %d", port_id); + return -EINVAL; + } + + for (q = 0; q < dev->nb_event_queues; q++) { + grps_base[q] = dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | q << 12); + if (grps_base[q] == 0) { + otx2_err("Failed to get grp[%d] base addr", q); + return -EINVAL; + } + } + + /* Set get_work timeout for HWS */ + val = NSEC2USEC(dev->deq_tmo_ns) - 1; + + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = event_dev->data->ports[port_id]; + + rte_memcpy(ws->grps_base, grps_base, + sizeof(uintptr_t) * OTX2_SSO_MAX_VHGRP); + ws->fc_mem = dev->fc_mem; + ws->xaq_lmt = dev->xaq_lmt; + ws->tstamp = dev->tstamp; + otx2_write64(val, OTX2_SSOW_GET_BASE_ADDR( + ws->ws_state[0].getwrk_op) + SSOW_LF_GWS_NW_TIM); + otx2_write64(val, OTX2_SSOW_GET_BASE_ADDR( + ws->ws_state[1].getwrk_op) + SSOW_LF_GWS_NW_TIM); + } else { + struct otx2_ssogws *ws = event_dev->data->ports[port_id]; + uintptr_t base = OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op); + + rte_memcpy(ws->grps_base, grps_base, + sizeof(uintptr_t) * OTX2_SSO_MAX_VHGRP); + ws->fc_mem = dev->fc_mem; + ws->xaq_lmt = dev->xaq_lmt; + ws->tstamp = dev->tstamp; + otx2_write64(val, base + SSOW_LF_GWS_NW_TIM); + } + + otx2_sso_dbg("Port=%d ws=%p", port_id, event_dev->data->ports[port_id]); + + return 0; +} + +static int +otx2_sso_timeout_ticks(struct rte_eventdev *event_dev, uint64_t ns, + uint64_t *tmo_ticks) +{ + RTE_SET_USED(event_dev); + *tmo_ticks = NSEC2TICK(ns, rte_get_timer_hz()); + + return 0; +} + +static void +ssogws_dump(struct otx2_ssogws *ws, FILE *f) +{ + uintptr_t base = OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op); + + fprintf(f, "SSOW_LF_GWS Base addr 0x%" PRIx64 "\n", (uint64_t)base); + fprintf(f, "SSOW_LF_GWS_LINKS 0x%" PRIx64 "\n", + otx2_read64(base + SSOW_LF_GWS_LINKS)); + fprintf(f, "SSOW_LF_GWS_PENDWQP 0x%" PRIx64 "\n", + otx2_read64(base + SSOW_LF_GWS_PENDWQP)); + fprintf(f, "SSOW_LF_GWS_PENDSTATE 0x%" PRIx64 "\n", + otx2_read64(base + SSOW_LF_GWS_PENDSTATE)); + fprintf(f, "SSOW_LF_GWS_NW_TIM 0x%" PRIx64 "\n", + otx2_read64(base + SSOW_LF_GWS_NW_TIM)); + fprintf(f, "SSOW_LF_GWS_TAG 0x%" PRIx64 "\n", + otx2_read64(base + SSOW_LF_GWS_TAG)); + fprintf(f, "SSOW_LF_GWS_WQP 0x%" PRIx64 "\n", + otx2_read64(base + SSOW_LF_GWS_TAG)); + fprintf(f, "SSOW_LF_GWS_SWTP 0x%" PRIx64 "\n", + otx2_read64(base + SSOW_LF_GWS_SWTP)); + fprintf(f, "SSOW_LF_GWS_PENDTAG 0x%" PRIx64 "\n", + otx2_read64(base + SSOW_LF_GWS_PENDTAG)); +} + +static void +ssoggrp_dump(uintptr_t base, FILE *f) +{ + fprintf(f, "SSO_LF_GGRP Base addr 0x%" PRIx64 "\n", (uint64_t)base); + fprintf(f, "SSO_LF_GGRP_QCTL 0x%" PRIx64 "\n", + otx2_read64(base + SSO_LF_GGRP_QCTL)); + fprintf(f, "SSO_LF_GGRP_XAQ_CNT 0x%" PRIx64 "\n", + otx2_read64(base + SSO_LF_GGRP_XAQ_CNT)); + fprintf(f, "SSO_LF_GGRP_INT_THR 0x%" PRIx64 "\n", + otx2_read64(base + SSO_LF_GGRP_INT_THR)); + fprintf(f, "SSO_LF_GGRP_INT_CNT 0x%" PRIX64 "\n", + otx2_read64(base + SSO_LF_GGRP_INT_CNT)); + fprintf(f, "SSO_LF_GGRP_AQ_CNT 0x%" PRIX64 "\n", + otx2_read64(base + SSO_LF_GGRP_AQ_CNT)); + fprintf(f, "SSO_LF_GGRP_AQ_THR 0x%" PRIX64 "\n", + otx2_read64(base + SSO_LF_GGRP_AQ_THR)); + fprintf(f, "SSO_LF_GGRP_MISC_CNT 0x%" PRIx64 "\n", + otx2_read64(base + SSO_LF_GGRP_MISC_CNT)); +} + +static void +otx2_sso_dump(struct rte_eventdev *event_dev, FILE *f) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint8_t queue; + uint8_t port; + + fprintf(f, "[%s] SSO running in [%s] mode\n", __func__, dev->dual_ws ? + "dual_ws" : "single_ws"); + /* Dump SSOW registers */ + for (port = 0; port < dev->nb_event_ports; port++) { + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = + event_dev->data->ports[port]; + + fprintf(f, "[%s] SSO dual workslot[%d] vws[%d] dump\n", + __func__, port, 0); + ssogws_dump((struct otx2_ssogws *)&ws->ws_state[0], f); + fprintf(f, "[%s]SSO dual workslot[%d] vws[%d] dump\n", + __func__, port, 1); + ssogws_dump((struct otx2_ssogws *)&ws->ws_state[1], f); + } else { + fprintf(f, "[%s]SSO single workslot[%d] dump\n", + __func__, port); + ssogws_dump(event_dev->data->ports[port], f); + } + } + + /* Dump SSO registers */ + for (queue = 0; queue < dev->nb_event_queues; queue++) { + fprintf(f, "[%s]SSO group[%d] dump\n", __func__, queue); + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = event_dev->data->ports[0]; + ssoggrp_dump(ws->grps_base[queue], f); + } else { + struct otx2_ssogws *ws = event_dev->data->ports[0]; + ssoggrp_dump(ws->grps_base[queue], f); + } + } +} + +static void +otx2_handle_event(void *arg, struct rte_event event) +{ + struct rte_eventdev *event_dev = arg; + + if (event_dev->dev_ops->dev_stop_flush != NULL) + event_dev->dev_ops->dev_stop_flush(event_dev->data->dev_id, + event, event_dev->data->dev_stop_flush_arg); +} + +static void +sso_qos_cfg(struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct sso_grp_qos_cfg *req; + uint16_t i; + + for (i = 0; i < dev->qos_queue_cnt; i++) { + uint8_t xaq_prcnt = dev->qos_parse_data[i].xaq_prcnt; + uint8_t iaq_prcnt = dev->qos_parse_data[i].iaq_prcnt; + uint8_t taq_prcnt = dev->qos_parse_data[i].taq_prcnt; + + if (dev->qos_parse_data[i].queue >= dev->nb_event_queues) + continue; + + req = otx2_mbox_alloc_msg_sso_grp_qos_config(dev->mbox); + req->xaq_limit = (dev->nb_xaq_cfg * + (xaq_prcnt ? xaq_prcnt : 100)) / 100; + req->taq_thr = (SSO_HWGRP_IAQ_MAX_THR_MASK * + (iaq_prcnt ? iaq_prcnt : 100)) / 100; + req->iaq_thr = (SSO_HWGRP_TAQ_MAX_THR_MASK * + (taq_prcnt ? taq_prcnt : 100)) / 100; + } + + if (dev->qos_queue_cnt) + otx2_mbox_process(dev->mbox); +} + +static void +sso_cleanup(struct rte_eventdev *event_dev, uint8_t enable) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint16_t i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws; + + ws = event_dev->data->ports[i]; + ssogws_reset((struct otx2_ssogws *)&ws->ws_state[0]); + ssogws_reset((struct otx2_ssogws *)&ws->ws_state[1]); + ws->swtag_req = 0; + ws->vws = 0; + ws->ws_state[0].cur_grp = 0; + ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; + ws->ws_state[1].cur_grp = 0; + ws->ws_state[1].cur_tt = SSO_SYNC_EMPTY; + } else { + struct otx2_ssogws *ws; + + ws = event_dev->data->ports[i]; + ssogws_reset(ws); + ws->swtag_req = 0; + ws->cur_grp = 0; + ws->cur_tt = SSO_SYNC_EMPTY; + } + } + + rte_mb(); + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = event_dev->data->ports[0]; + struct otx2_ssogws temp_ws; + + memcpy(&temp_ws, &ws->ws_state[0], + sizeof(struct otx2_ssogws_state)); + for (i = 0; i < dev->nb_event_queues; i++) { + /* Consume all the events through HWS0 */ + ssogws_flush_events(&temp_ws, i, ws->grps_base[i], + otx2_handle_event, event_dev); + /* Enable/Disable SSO GGRP */ + otx2_write64(enable, ws->grps_base[i] + + SSO_LF_GGRP_QCTL); + } + ws->ws_state[0].cur_grp = 0; + ws->ws_state[0].cur_tt = SSO_SYNC_EMPTY; + } else { + struct otx2_ssogws *ws = event_dev->data->ports[0]; + + for (i = 0; i < dev->nb_event_queues; i++) { + /* Consume all the events through HWS0 */ + ssogws_flush_events(ws, i, ws->grps_base[i], + otx2_handle_event, event_dev); + /* Enable/Disable SSO GGRP */ + otx2_write64(enable, ws->grps_base[i] + + SSO_LF_GGRP_QCTL); + } + ws->cur_grp = 0; + ws->cur_tt = SSO_SYNC_EMPTY; + } + + /* reset SSO GWS cache */ + otx2_mbox_alloc_msg_sso_ws_cache_inv(dev->mbox); + otx2_mbox_process(dev->mbox); +} + +int +sso_xae_reconfigure(struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct rte_mempool *prev_xaq_pool; + int rc = 0; + + if (event_dev->data->dev_started) + sso_cleanup(event_dev, 0); + + prev_xaq_pool = dev->xaq_pool; + dev->xaq_pool = NULL; + rc = sso_xaq_allocate(dev); + if (rc < 0) { + otx2_err("Failed to alloc xaq pool %d", rc); + rte_mempool_free(prev_xaq_pool); + return rc; + } + rc = sso_ggrp_alloc_xaq(dev); + if (rc < 0) { + otx2_err("Failed to alloc xaq to ggrp %d", rc); + rte_mempool_free(prev_xaq_pool); + return rc; + } + + rte_mempool_free(prev_xaq_pool); + rte_mb(); + if (event_dev->data->dev_started) + sso_cleanup(event_dev, 1); + + return 0; +} + +static int +otx2_sso_start(struct rte_eventdev *event_dev) +{ + sso_func_trace(); + sso_qos_cfg(event_dev); + sso_cleanup(event_dev, 1); + sso_fastpath_fns_set(event_dev); + + return 0; +} + +static void +otx2_sso_stop(struct rte_eventdev *event_dev) +{ + sso_func_trace(); + sso_cleanup(event_dev, 0); + rte_mb(); +} + +static int +otx2_sso_close(struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint8_t all_queues[RTE_EVENT_MAX_QUEUES_PER_DEV]; + uint16_t i; + + if (!dev->configured) + return 0; + + sso_unregister_irqs(event_dev); + + for (i = 0; i < dev->nb_event_queues; i++) + all_queues[i] = i; + + for (i = 0; i < dev->nb_event_ports; i++) + otx2_sso_port_unlink(event_dev, event_dev->data->ports[i], + all_queues, dev->nb_event_queues); + + sso_lf_teardown(dev, SSO_LF_GGRP); + sso_lf_teardown(dev, SSO_LF_GWS); + dev->nb_event_ports = 0; + dev->nb_event_queues = 0; + rte_mempool_free(dev->xaq_pool); + rte_memzone_free(rte_memzone_lookup(OTX2_SSO_FC_NAME)); + + return 0; +} + +/* Initialize and register event driver with DPDK Application */ +static struct rte_eventdev_ops otx2_sso_ops = { + .dev_infos_get = otx2_sso_info_get, + .dev_configure = otx2_sso_configure, + .queue_def_conf = otx2_sso_queue_def_conf, + .queue_setup = otx2_sso_queue_setup, + .queue_release = otx2_sso_queue_release, + .port_def_conf = otx2_sso_port_def_conf, + .port_setup = otx2_sso_port_setup, + .port_release = otx2_sso_port_release, + .port_link = otx2_sso_port_link, + .port_unlink = otx2_sso_port_unlink, + .timeout_ticks = otx2_sso_timeout_ticks, + + .eth_rx_adapter_caps_get = otx2_sso_rx_adapter_caps_get, + .eth_rx_adapter_queue_add = otx2_sso_rx_adapter_queue_add, + .eth_rx_adapter_queue_del = otx2_sso_rx_adapter_queue_del, + .eth_rx_adapter_start = otx2_sso_rx_adapter_start, + .eth_rx_adapter_stop = otx2_sso_rx_adapter_stop, + + .eth_tx_adapter_caps_get = otx2_sso_tx_adapter_caps_get, + .eth_tx_adapter_queue_add = otx2_sso_tx_adapter_queue_add, + .eth_tx_adapter_queue_del = otx2_sso_tx_adapter_queue_del, + + .timer_adapter_caps_get = otx2_tim_caps_get, + + .xstats_get = otx2_sso_xstats_get, + .xstats_reset = otx2_sso_xstats_reset, + .xstats_get_names = otx2_sso_xstats_get_names, + + .dump = otx2_sso_dump, + .dev_start = otx2_sso_start, + .dev_stop = otx2_sso_stop, + .dev_close = otx2_sso_close, + .dev_selftest = otx2_sso_selftest, +}; + +#define OTX2_SSO_XAE_CNT "xae_cnt" +#define OTX2_SSO_SINGLE_WS "single_ws" +#define OTX2_SSO_GGRP_QOS "qos" +#define OTX2_SSO_SELFTEST "selftest" + +static void +parse_queue_param(char *value, void *opaque) +{ + struct otx2_sso_qos queue_qos = {0}; + uint8_t *val = (uint8_t *)&queue_qos; + struct otx2_sso_evdev *dev = opaque; + char *tok = strtok(value, "-"); + struct otx2_sso_qos *old_ptr; + + if (!strlen(value)) + return; + + while (tok != NULL) { + *val = atoi(tok); + tok = strtok(NULL, "-"); + val++; + } + + if (val != (&queue_qos.iaq_prcnt + 1)) { + otx2_err("Invalid QoS parameter expected [Qx-XAQ-TAQ-IAQ]"); + return; + } + + dev->qos_queue_cnt++; + old_ptr = dev->qos_parse_data; + dev->qos_parse_data = rte_realloc(dev->qos_parse_data, + sizeof(struct otx2_sso_qos) * + dev->qos_queue_cnt, 0); + if (dev->qos_parse_data == NULL) { + dev->qos_parse_data = old_ptr; + dev->qos_queue_cnt--; + return; + } + dev->qos_parse_data[dev->qos_queue_cnt - 1] = queue_qos; +} + +static void +parse_qos_list(const char *value, void *opaque) +{ + char *s = strdup(value); + char *start = NULL; + char *end = NULL; + char *f = s; + + while (*s) { + if (*s == '[') + start = s; + else if (*s == ']') + end = s; + + if (start && start < end) { + *end = 0; + parse_queue_param(start + 1, opaque); + s = end; + start = end; + } + s++; + } + + free(f); +} + +static int +parse_sso_kvargs_dict(const char *key, const char *value, void *opaque) +{ + RTE_SET_USED(key); + + /* Dict format [Qx-XAQ-TAQ-IAQ][Qz-XAQ-TAQ-IAQ] use '-' cause ',' + * isn't allowed. Everything is expressed in percentages, 0 represents + * default. + */ + parse_qos_list(value, opaque); + + return 0; +} + +static void +sso_parse_devargs(struct otx2_sso_evdev *dev, struct rte_devargs *devargs) +{ + struct rte_kvargs *kvlist; + uint8_t single_ws = 0; + + if (devargs == NULL) + return; + kvlist = rte_kvargs_parse(devargs->args, NULL); + if (kvlist == NULL) + return; + + rte_kvargs_process(kvlist, OTX2_SSO_SELFTEST, &parse_kvargs_flag, + &dev->selftest); + rte_kvargs_process(kvlist, OTX2_SSO_XAE_CNT, &parse_kvargs_value, + &dev->xae_cnt); + rte_kvargs_process(kvlist, OTX2_SSO_SINGLE_WS, &parse_kvargs_flag, + &single_ws); + rte_kvargs_process(kvlist, OTX2_SSO_GGRP_QOS, &parse_sso_kvargs_dict, + dev); + otx2_parse_common_devargs(kvlist); + dev->dual_ws = !single_ws; + rte_kvargs_free(kvlist); +} + +static int +otx2_sso_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +{ + return rte_event_pmd_pci_probe(pci_drv, pci_dev, + sizeof(struct otx2_sso_evdev), + otx2_sso_init); +} + +static int +otx2_sso_remove(struct rte_pci_device *pci_dev) +{ + return rte_event_pmd_pci_remove(pci_dev, otx2_sso_fini); +} + +static const struct rte_pci_id pci_sso_map[] = { + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, + PCI_DEVID_OCTEONTX2_RVU_SSO_TIM_PF) + }, + { + .vendor_id = 0, + }, +}; + +static struct rte_pci_driver pci_sso = { + .id_table = pci_sso_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA, + .probe = otx2_sso_probe, + .remove = otx2_sso_remove, +}; + +int +otx2_sso_init(struct rte_eventdev *event_dev) +{ + struct free_rsrcs_rsp *rsrc_cnt; + struct rte_pci_device *pci_dev; + struct otx2_sso_evdev *dev; + int rc; + + event_dev->dev_ops = &otx2_sso_ops; + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + sso_fastpath_fns_set(event_dev); + return 0; + } + + dev = sso_pmd_priv(event_dev); + + pci_dev = container_of(event_dev->dev, struct rte_pci_device, device); + + /* Initialize the base otx2_dev object */ + rc = otx2_dev_init(pci_dev, dev); + if (rc < 0) { + otx2_err("Failed to initialize otx2_dev rc=%d", rc); + goto error; + } + + /* Get SSO and SSOW MSIX rsrc cnt */ + otx2_mbox_alloc_msg_free_rsrc_cnt(dev->mbox); + rc = otx2_mbox_process_msg(dev->mbox, (void *)&rsrc_cnt); + if (rc < 0) { + otx2_err("Unable to get free rsrc count"); + goto otx2_dev_uninit; + } + otx2_sso_dbg("SSO %d SSOW %d NPA %d provisioned", rsrc_cnt->sso, + rsrc_cnt->ssow, rsrc_cnt->npa); + + dev->max_event_ports = RTE_MIN(rsrc_cnt->ssow, OTX2_SSO_MAX_VHWS); + dev->max_event_queues = RTE_MIN(rsrc_cnt->sso, OTX2_SSO_MAX_VHGRP); + /* Grab the NPA LF if required */ + rc = otx2_npa_lf_init(pci_dev, dev); + if (rc < 0) { + otx2_err("Unable to init NPA lf. It might not be provisioned"); + goto otx2_dev_uninit; + } + + dev->drv_inited = true; + dev->is_timeout_deq = 0; + dev->min_dequeue_timeout_ns = USEC2NSEC(1); + dev->max_dequeue_timeout_ns = USEC2NSEC(0x3FF); + dev->max_num_events = -1; + dev->nb_event_queues = 0; + dev->nb_event_ports = 0; + + if (!dev->max_event_ports || !dev->max_event_queues) { + otx2_err("Not enough eventdev resource queues=%d ports=%d", + dev->max_event_queues, dev->max_event_ports); + rc = -ENODEV; + goto otx2_npa_lf_uninit; + } + + dev->dual_ws = 1; + sso_parse_devargs(dev, pci_dev->device.devargs); + if (dev->dual_ws) { + otx2_sso_dbg("Using dual workslot mode"); + dev->max_event_ports = dev->max_event_ports / 2; + } else { + otx2_sso_dbg("Using single workslot mode"); + } + + otx2_sso_pf_func_set(dev->pf_func); + otx2_sso_dbg("Initializing %s max_queues=%d max_ports=%d", + event_dev->data->name, dev->max_event_queues, + dev->max_event_ports); + if (dev->selftest) { + event_dev->dev->driver = &pci_sso.driver; + event_dev->dev_ops->dev_selftest(); + } + + otx2_tim_init(pci_dev, (struct otx2_dev *)dev); + + return 0; + +otx2_npa_lf_uninit: + otx2_npa_lf_fini(); +otx2_dev_uninit: + otx2_dev_fini(pci_dev, dev); +error: + return rc; +} + +int +otx2_sso_fini(struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct rte_pci_device *pci_dev; + + /* For secondary processes, nothing to be done */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + pci_dev = container_of(event_dev->dev, struct rte_pci_device, device); + + if (!dev->drv_inited) + goto dev_fini; + + dev->drv_inited = false; + otx2_npa_lf_fini(); + +dev_fini: + if (otx2_npa_lf_active(dev)) { + otx2_info("Common resource in use by other devices"); + return -EAGAIN; + } + + otx2_tim_fini(); + otx2_dev_fini(pci_dev, dev); + + return 0; +} + +RTE_PMD_REGISTER_PCI(event_octeontx2, pci_sso); +RTE_PMD_REGISTER_PCI_TABLE(event_octeontx2, pci_sso_map); +RTE_PMD_REGISTER_KMOD_DEP(event_octeontx2, "vfio-pci"); +RTE_PMD_REGISTER_PARAM_STRING(event_octeontx2, OTX2_SSO_XAE_CNT "=<int>" + OTX2_SSO_SINGLE_WS "=1" + OTX2_SSO_GGRP_QOS "=<string>" + OTX2_SSO_SELFTEST "=1" + OTX2_NPA_LOCK_MASK "=<1-65535>"); diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev.h b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev.h new file mode 100644 index 000000000..3b477820f --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#ifndef __OTX2_EVDEV_H__ +#define __OTX2_EVDEV_H__ + +#include <rte_eventdev.h> +#include <rte_eventdev_pmd.h> +#include <rte_event_eth_rx_adapter.h> +#include <rte_event_eth_tx_adapter.h> + +#include "otx2_common.h" +#include "otx2_dev.h" +#include "otx2_ethdev.h" +#include "otx2_mempool.h" +#include "otx2_tim_evdev.h" + +#define EVENTDEV_NAME_OCTEONTX2_PMD event_octeontx2 + +#define sso_func_trace otx2_sso_dbg + +#define OTX2_SSO_MAX_VHGRP RTE_EVENT_MAX_QUEUES_PER_DEV +#define OTX2_SSO_MAX_VHWS (UINT8_MAX) +#define OTX2_SSO_FC_NAME "otx2_evdev_xaq_fc" +#define OTX2_SSO_SQB_LIMIT (0x180) +#define OTX2_SSO_XAQ_SLACK (8) +#define OTX2_SSO_XAQ_CACHE_CNT (0x7) +#define OTX2_SSO_WQE_SG_PTR (9) + +/* SSO LF register offsets (BAR2) */ +#define SSO_LF_GGRP_OP_ADD_WORK0 (0x0ull) +#define SSO_LF_GGRP_OP_ADD_WORK1 (0x8ull) + +#define SSO_LF_GGRP_QCTL (0x20ull) +#define SSO_LF_GGRP_EXE_DIS (0x80ull) +#define SSO_LF_GGRP_INT (0x100ull) +#define SSO_LF_GGRP_INT_W1S (0x108ull) +#define SSO_LF_GGRP_INT_ENA_W1S (0x110ull) +#define SSO_LF_GGRP_INT_ENA_W1C (0x118ull) +#define SSO_LF_GGRP_INT_THR (0x140ull) +#define SSO_LF_GGRP_INT_CNT (0x180ull) +#define SSO_LF_GGRP_XAQ_CNT (0x1b0ull) +#define SSO_LF_GGRP_AQ_CNT (0x1c0ull) +#define SSO_LF_GGRP_AQ_THR (0x1e0ull) +#define SSO_LF_GGRP_MISC_CNT (0x200ull) + +/* SSOW LF register offsets (BAR2) */ +#define SSOW_LF_GWS_LINKS (0x10ull) +#define SSOW_LF_GWS_PENDWQP (0x40ull) +#define SSOW_LF_GWS_PENDSTATE (0x50ull) +#define SSOW_LF_GWS_NW_TIM (0x70ull) +#define SSOW_LF_GWS_GRPMSK_CHG (0x80ull) +#define SSOW_LF_GWS_INT (0x100ull) +#define SSOW_LF_GWS_INT_W1S (0x108ull) +#define SSOW_LF_GWS_INT_ENA_W1S (0x110ull) +#define SSOW_LF_GWS_INT_ENA_W1C (0x118ull) +#define SSOW_LF_GWS_TAG (0x200ull) +#define SSOW_LF_GWS_WQP (0x210ull) +#define SSOW_LF_GWS_SWTP (0x220ull) +#define SSOW_LF_GWS_PENDTAG (0x230ull) +#define SSOW_LF_GWS_OP_ALLOC_WE (0x400ull) +#define SSOW_LF_GWS_OP_GET_WORK (0x600ull) +#define SSOW_LF_GWS_OP_SWTAG_FLUSH (0x800ull) +#define SSOW_LF_GWS_OP_SWTAG_UNTAG (0x810ull) +#define SSOW_LF_GWS_OP_SWTP_CLR (0x820ull) +#define SSOW_LF_GWS_OP_UPD_WQP_GRP0 (0x830ull) +#define SSOW_LF_GWS_OP_UPD_WQP_GRP1 (0x838ull) +#define SSOW_LF_GWS_OP_DESCHED (0x880ull) +#define SSOW_LF_GWS_OP_DESCHED_NOSCH (0x8c0ull) +#define SSOW_LF_GWS_OP_SWTAG_DESCHED (0x980ull) +#define SSOW_LF_GWS_OP_SWTAG_NOSCHED (0x9c0ull) +#define SSOW_LF_GWS_OP_CLR_NSCHED0 (0xa00ull) +#define SSOW_LF_GWS_OP_CLR_NSCHED1 (0xa08ull) +#define SSOW_LF_GWS_OP_SWTP_SET (0xc00ull) +#define SSOW_LF_GWS_OP_SWTAG_NORM (0xc10ull) +#define SSOW_LF_GWS_OP_SWTAG_FULL0 (0xc20ull) +#define SSOW_LF_GWS_OP_SWTAG_FULL1 (0xc28ull) +#define SSOW_LF_GWS_OP_GWC_INVAL (0xe00ull) + +#define OTX2_SSOW_GET_BASE_ADDR(_GW) ((_GW) - SSOW_LF_GWS_OP_GET_WORK) + +#define NSEC2USEC(__ns) ((__ns) / 1E3) +#define USEC2NSEC(__us) ((__us) * 1E3) +#define NSEC2TICK(__ns, __freq) (((__ns) * (__freq)) / 1E9) +#define TICK2NSEC(__tck, __freq) (((__tck) * 1E9) / (__freq)) + +enum otx2_sso_lf_type { + SSO_LF_GGRP, + SSO_LF_GWS +}; + +union otx2_sso_event { + uint64_t get_work0; + struct { + uint32_t flow_id:20; + uint32_t sub_event_type:8; + uint32_t event_type:4; + uint8_t op:2; + uint8_t rsvd:4; + uint8_t sched_type:2; + uint8_t queue_id; + uint8_t priority; + uint8_t impl_opaque; + }; +} __rte_aligned(64); + +enum { + SSO_SYNC_ORDERED, + SSO_SYNC_ATOMIC, + SSO_SYNC_UNTAGGED, + SSO_SYNC_EMPTY +}; + +struct otx2_sso_qos { + uint8_t queue; + uint8_t xaq_prcnt; + uint8_t taq_prcnt; + uint8_t iaq_prcnt; +}; + +struct otx2_sso_evdev { + OTX2_DEV; /* Base class */ + uint8_t max_event_queues; + uint8_t max_event_ports; + uint8_t is_timeout_deq; + uint8_t nb_event_queues; + uint8_t nb_event_ports; + uint8_t configured; + uint32_t deq_tmo_ns; + uint32_t min_dequeue_timeout_ns; + uint32_t max_dequeue_timeout_ns; + int32_t max_num_events; + uint64_t *fc_mem; + uint64_t xaq_lmt; + uint64_t nb_xaq_cfg; + rte_iova_t fc_iova; + struct rte_mempool *xaq_pool; + uint64_t rx_offloads; + uint64_t tx_offloads; + uint64_t adptr_xae_cnt; + uint16_t rx_adptr_pool_cnt; + uint64_t *rx_adptr_pools; + uint16_t tim_adptr_ring_cnt; + uint16_t *timer_adptr_rings; + uint64_t *timer_adptr_sz; + /* Dev args */ + uint8_t dual_ws; + uint8_t selftest; + uint32_t xae_cnt; + uint8_t qos_queue_cnt; + struct otx2_sso_qos *qos_parse_data; + /* HW const */ + uint32_t xae_waes; + uint32_t xaq_buf_size; + uint32_t iue; + /* MSIX offsets */ + uint16_t sso_msixoff[OTX2_SSO_MAX_VHGRP]; + uint16_t ssow_msixoff[OTX2_SSO_MAX_VHWS]; + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; +} __rte_cache_aligned; + +#define OTX2_SSOGWS_OPS \ + /* WS ops */ \ + uintptr_t getwrk_op; \ + uintptr_t tag_op; \ + uintptr_t wqp_op; \ + uintptr_t swtp_op; \ + uintptr_t swtag_norm_op; \ + uintptr_t swtag_desched_op; \ + uint8_t cur_tt; \ + uint8_t cur_grp + +/* Event port aka GWS */ +struct otx2_ssogws { + /* Get Work Fastpath data */ + OTX2_SSOGWS_OPS; + uint8_t swtag_req; + void *lookup_mem; + uint8_t port; + /* Add Work Fastpath data */ + uint64_t xaq_lmt __rte_cache_aligned; + uint64_t *fc_mem; + uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; +} __rte_cache_aligned; + +struct otx2_ssogws_state { + OTX2_SSOGWS_OPS; +}; + +struct otx2_ssogws_dual { + /* Get Work Fastpath data */ + struct otx2_ssogws_state ws_state[2]; /* Ping and Pong */ + uint8_t swtag_req; + uint8_t vws; /* Ping pong bit */ + void *lookup_mem; + uint8_t port; + /* Add Work Fastpath data */ + uint64_t xaq_lmt __rte_cache_aligned; + uint64_t *fc_mem; + uintptr_t grps_base[OTX2_SSO_MAX_VHGRP]; + /* PTP timestamp */ + struct otx2_timesync_info *tstamp; +} __rte_cache_aligned; + +static inline struct otx2_sso_evdev * +sso_pmd_priv(const struct rte_eventdev *event_dev) +{ + return event_dev->data->dev_private; +} + +static const union mbuf_initializer mbuf_init = { + .fields = { + .data_off = RTE_PKTMBUF_HEADROOM, + .refcnt = 1, + .nb_segs = 1, + .port = 0 + } +}; + +static __rte_always_inline void +otx2_wqe_to_mbuf(uint64_t get_work1, const uint64_t mbuf, uint8_t port_id, + const uint32_t tag, const uint32_t flags, + const void * const lookup_mem) +{ + struct nix_wqe_hdr_s *wqe = (struct nix_wqe_hdr_s *)get_work1; + uint64_t val = mbuf_init.value | (uint64_t)port_id << 48; + + if (flags & NIX_RX_OFFLOAD_TSTAMP_F) + val |= NIX_TIMESYNC_RX_OFFSET; + + otx2_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag, + (struct rte_mbuf *)mbuf, lookup_mem, + val, flags); + +} + +static inline int +parse_kvargs_flag(const char *key, const char *value, void *opaque) +{ + RTE_SET_USED(key); + + *(uint8_t *)opaque = !!atoi(value); + return 0; +} + +static inline int +parse_kvargs_value(const char *key, const char *value, void *opaque) +{ + RTE_SET_USED(key); + + *(uint32_t *)opaque = (uint32_t)atoi(value); + return 0; +} + +#define SSO_RX_ADPTR_ENQ_FASTPATH_FUNC NIX_RX_FASTPATH_MODES +#define SSO_TX_ADPTR_ENQ_FASTPATH_FUNC NIX_TX_FASTPATH_MODES + +/* Single WS API's */ +uint16_t otx2_ssogws_enq(void *port, const struct rte_event *ev); +uint16_t otx2_ssogws_enq_burst(void *port, const struct rte_event ev[], + uint16_t nb_events); +uint16_t otx2_ssogws_enq_new_burst(void *port, const struct rte_event ev[], + uint16_t nb_events); +uint16_t otx2_ssogws_enq_fwd_burst(void *port, const struct rte_event ev[], + uint16_t nb_events); + +/* Dual WS API's */ +uint16_t otx2_ssogws_dual_enq(void *port, const struct rte_event *ev); +uint16_t otx2_ssogws_dual_enq_burst(void *port, const struct rte_event ev[], + uint16_t nb_events); +uint16_t otx2_ssogws_dual_enq_new_burst(void *port, const struct rte_event ev[], + uint16_t nb_events); +uint16_t otx2_ssogws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], + uint16_t nb_events); + +/* Auto generated API's */ +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ +uint16_t otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_deq_burst_ ##name(void *port, struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_deq_timeout_ ##name(void *port, \ + struct rte_event *ev, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_deq_timeout_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_deq_seg_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_deq_seg_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_deq_seg_timeout_ ##name(void *port, \ + struct rte_event *ev, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_deq_seg_timeout_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks); \ + \ +uint16_t otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_dual_deq_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_dual_deq_timeout_ ##name(void *port, \ + struct rte_event *ev, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_dual_deq_timeout_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_dual_deq_seg_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port, \ + struct rte_event *ev, \ + uint64_t timeout_ticks); \ +uint16_t otx2_ssogws_dual_deq_seg_timeout_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks);\ + +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ +uint16_t otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[],\ + uint16_t nb_events); \ +uint16_t otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events); \ +uint16_t otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events); \ +uint16_t otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events); \ + +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + +void sso_updt_xae_cnt(struct otx2_sso_evdev *dev, void *data, + uint32_t event_type); +int sso_xae_reconfigure(struct rte_eventdev *event_dev); +void sso_fastpath_fns_set(struct rte_eventdev *event_dev); + +int otx2_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + uint32_t *caps); +int otx2_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf); +int otx2_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id); +int otx2_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int otx2_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev); +int otx2_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + uint32_t *caps); +int otx2_sso_tx_adapter_queue_add(uint8_t id, + const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); + +int otx2_sso_tx_adapter_queue_del(uint8_t id, + const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id); + +/* Clean up API's */ +typedef void (*otx2_handle_event_t)(void *arg, struct rte_event ev); +void ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, + uintptr_t base, otx2_handle_event_t fn, void *arg); +void ssogws_reset(struct otx2_ssogws *ws); +/* Selftest */ +int otx2_sso_selftest(void); +/* Init and Fini API's */ +int otx2_sso_init(struct rte_eventdev *event_dev); +int otx2_sso_fini(struct rte_eventdev *event_dev); +/* IRQ handlers */ +int sso_register_irqs(const struct rte_eventdev *event_dev); +void sso_unregister_irqs(const struct rte_eventdev *event_dev); + +#endif /* __OTX2_EVDEV_H__ */ diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c new file mode 100644 index 000000000..8bdcfa3ea --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_adptr.c @@ -0,0 +1,492 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#include "otx2_evdev.h" + +int +otx2_sso_rx_adapter_caps_get(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_octeontx2", 13); + if (rc) + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + else + *caps = RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | + RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ; + + return 0; +} + +static inline int +sso_rxq_enable(struct otx2_eth_dev *dev, uint16_t qid, uint8_t tt, uint8_t ggrp, + uint16_t eth_port_id) +{ + struct otx2_mbox *mbox = dev->mbox; + struct nix_aq_enq_req *aq; + int rc; + + aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); + aq->qidx = qid; + aq->ctype = NIX_AQ_CTYPE_CQ; + aq->op = NIX_AQ_INSTOP_WRITE; + + aq->cq.ena = 0; + aq->cq.caching = 0; + + otx2_mbox_memset(&aq->cq_mask, 0, sizeof(struct nix_cq_ctx_s)); + aq->cq_mask.ena = ~(aq->cq_mask.ena); + aq->cq_mask.caching = ~(aq->cq_mask.caching); + + rc = otx2_mbox_process(mbox); + if (rc < 0) { + otx2_err("Failed to disable cq context"); + goto fail; + } + + aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); + aq->qidx = qid; + aq->ctype = NIX_AQ_CTYPE_RQ; + aq->op = NIX_AQ_INSTOP_WRITE; + + aq->rq.sso_ena = 1; + aq->rq.sso_tt = tt; + aq->rq.sso_grp = ggrp; + aq->rq.ena_wqwd = 1; + /* Mbuf Header generation : + * > FIRST_SKIP is a super set of WQE_SKIP, dont modify first skip as + * it already has data related to mbuf size, headroom, private area. + * > Using WQE_SKIP we can directly assign + * mbuf = wqe - sizeof(struct mbuf); + * so that mbuf header will not have unpredicted values while headroom + * and private data starts at the beginning of wqe_data. + */ + aq->rq.wqe_skip = 1; + aq->rq.wqe_caching = 1; + aq->rq.spb_ena = 0; + aq->rq.flow_tagw = 20; /* 20-bits */ + + /* Flow Tag calculation : + * + * rq_tag <31:24> = good/bad_tag<8:0>; + * rq_tag <23:0> = [ltag] + * + * flow_tag_mask<31:0> = (1 << flow_tagw) - 1; <31:20> + * tag<31:0> = (~flow_tag_mask & rq_tag) | (flow_tag_mask & flow_tag); + * + * Setup : + * ltag<23:0> = (eth_port_id & 0xF) << 20; + * good/bad_tag<8:0> = + * ((eth_port_id >> 4) & 0xF) | (RTE_EVENT_TYPE_ETHDEV << 4); + * + * TAG<31:0> on getwork = <31:28>(RTE_EVENT_TYPE_ETHDEV) | + * <27:20> (eth_port_id) | <20:0> [TAG] + */ + + aq->rq.ltag = (eth_port_id & 0xF) << 20; + aq->rq.good_utag = ((eth_port_id >> 4) & 0xF) | + (RTE_EVENT_TYPE_ETHDEV << 4); + aq->rq.bad_utag = aq->rq.good_utag; + + aq->rq.ena = 0; /* Don't enable RQ yet */ + aq->rq.pb_caching = 0x2; /* First cache aligned block to LLC */ + aq->rq.xqe_imm_size = 0; /* No pkt data copy to CQE */ + + otx2_mbox_memset(&aq->rq_mask, 0, sizeof(struct nix_rq_ctx_s)); + /* mask the bits to write. */ + aq->rq_mask.sso_ena = ~(aq->rq_mask.sso_ena); + aq->rq_mask.sso_tt = ~(aq->rq_mask.sso_tt); + aq->rq_mask.sso_grp = ~(aq->rq_mask.sso_grp); + aq->rq_mask.ena_wqwd = ~(aq->rq_mask.ena_wqwd); + aq->rq_mask.wqe_skip = ~(aq->rq_mask.wqe_skip); + aq->rq_mask.wqe_caching = ~(aq->rq_mask.wqe_caching); + aq->rq_mask.spb_ena = ~(aq->rq_mask.spb_ena); + aq->rq_mask.flow_tagw = ~(aq->rq_mask.flow_tagw); + aq->rq_mask.ltag = ~(aq->rq_mask.ltag); + aq->rq_mask.good_utag = ~(aq->rq_mask.good_utag); + aq->rq_mask.bad_utag = ~(aq->rq_mask.bad_utag); + aq->rq_mask.ena = ~(aq->rq_mask.ena); + aq->rq_mask.pb_caching = ~(aq->rq_mask.pb_caching); + aq->rq_mask.xqe_imm_size = ~(aq->rq_mask.xqe_imm_size); + + rc = otx2_mbox_process(mbox); + if (rc < 0) { + otx2_err("Failed to init rx adapter context"); + goto fail; + } + + return 0; +fail: + return rc; +} + +static inline int +sso_rxq_disable(struct otx2_eth_dev *dev, uint16_t qid) +{ + struct otx2_mbox *mbox = dev->mbox; + struct nix_aq_enq_req *aq; + int rc; + + aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); + aq->qidx = qid; + aq->ctype = NIX_AQ_CTYPE_CQ; + aq->op = NIX_AQ_INSTOP_WRITE; + + aq->cq.ena = 1; + aq->cq.caching = 1; + + otx2_mbox_memset(&aq->cq_mask, 0, sizeof(struct nix_cq_ctx_s)); + aq->cq_mask.ena = ~(aq->cq_mask.ena); + aq->cq_mask.caching = ~(aq->cq_mask.caching); + + rc = otx2_mbox_process(mbox); + if (rc < 0) { + otx2_err("Failed to enable cq context"); + goto fail; + } + + aq = otx2_mbox_alloc_msg_nix_aq_enq(mbox); + aq->qidx = qid; + aq->ctype = NIX_AQ_CTYPE_RQ; + aq->op = NIX_AQ_INSTOP_WRITE; + + aq->rq.sso_ena = 0; + aq->rq.sso_tt = SSO_TT_UNTAGGED; + aq->rq.sso_grp = 0; + aq->rq.ena_wqwd = 0; + aq->rq.wqe_caching = 0; + aq->rq.wqe_skip = 0; + aq->rq.spb_ena = 0; + aq->rq.flow_tagw = 0x20; + aq->rq.ltag = 0; + aq->rq.good_utag = 0; + aq->rq.bad_utag = 0; + aq->rq.ena = 1; + aq->rq.pb_caching = 0x2; /* First cache aligned block to LLC */ + aq->rq.xqe_imm_size = 0; /* No pkt data copy to CQE */ + + otx2_mbox_memset(&aq->rq_mask, 0, sizeof(struct nix_rq_ctx_s)); + /* mask the bits to write. */ + aq->rq_mask.sso_ena = ~(aq->rq_mask.sso_ena); + aq->rq_mask.sso_tt = ~(aq->rq_mask.sso_tt); + aq->rq_mask.sso_grp = ~(aq->rq_mask.sso_grp); + aq->rq_mask.ena_wqwd = ~(aq->rq_mask.ena_wqwd); + aq->rq_mask.wqe_caching = ~(aq->rq_mask.wqe_caching); + aq->rq_mask.wqe_skip = ~(aq->rq_mask.wqe_skip); + aq->rq_mask.spb_ena = ~(aq->rq_mask.spb_ena); + aq->rq_mask.flow_tagw = ~(aq->rq_mask.flow_tagw); + aq->rq_mask.ltag = ~(aq->rq_mask.ltag); + aq->rq_mask.good_utag = ~(aq->rq_mask.good_utag); + aq->rq_mask.bad_utag = ~(aq->rq_mask.bad_utag); + aq->rq_mask.ena = ~(aq->rq_mask.ena); + aq->rq_mask.pb_caching = ~(aq->rq_mask.pb_caching); + aq->rq_mask.xqe_imm_size = ~(aq->rq_mask.xqe_imm_size); + + rc = otx2_mbox_process(mbox); + if (rc < 0) { + otx2_err("Failed to clear rx adapter context"); + goto fail; + } + + return 0; +fail: + return rc; +} + +void +sso_updt_xae_cnt(struct otx2_sso_evdev *dev, void *data, uint32_t event_type) +{ + int i; + + switch (event_type) { + case RTE_EVENT_TYPE_ETHDEV: + { + struct otx2_eth_rxq *rxq = data; + uint64_t *old_ptr; + + for (i = 0; i < dev->rx_adptr_pool_cnt; i++) { + if ((uint64_t)rxq->pool == dev->rx_adptr_pools[i]) + return; + } + + dev->rx_adptr_pool_cnt++; + old_ptr = dev->rx_adptr_pools; + dev->rx_adptr_pools = rte_realloc(dev->rx_adptr_pools, + sizeof(uint64_t) * + dev->rx_adptr_pool_cnt, 0); + if (dev->rx_adptr_pools == NULL) { + dev->adptr_xae_cnt += rxq->pool->size; + dev->rx_adptr_pools = old_ptr; + dev->rx_adptr_pool_cnt--; + return; + } + dev->rx_adptr_pools[dev->rx_adptr_pool_cnt - 1] = + (uint64_t)rxq->pool; + + dev->adptr_xae_cnt += rxq->pool->size; + break; + } + case RTE_EVENT_TYPE_TIMER: + { + struct otx2_tim_ring *timr = data; + uint16_t *old_ring_ptr; + uint64_t *old_sz_ptr; + + for (i = 0; i < dev->tim_adptr_ring_cnt; i++) { + if (timr->ring_id != dev->timer_adptr_rings[i]) + continue; + if (timr->nb_timers == dev->timer_adptr_sz[i]) + return; + dev->adptr_xae_cnt -= dev->timer_adptr_sz[i]; + dev->adptr_xae_cnt += timr->nb_timers; + dev->timer_adptr_sz[i] = timr->nb_timers; + + return; + } + + dev->tim_adptr_ring_cnt++; + old_ring_ptr = dev->timer_adptr_rings; + old_sz_ptr = dev->timer_adptr_sz; + + dev->timer_adptr_rings = rte_realloc(dev->timer_adptr_rings, + sizeof(uint16_t) * + dev->tim_adptr_ring_cnt, + 0); + if (dev->timer_adptr_rings == NULL) { + dev->adptr_xae_cnt += timr->nb_timers; + dev->timer_adptr_rings = old_ring_ptr; + dev->tim_adptr_ring_cnt--; + return; + } + + dev->timer_adptr_sz = rte_realloc(dev->timer_adptr_sz, + sizeof(uint64_t) * + dev->tim_adptr_ring_cnt, + 0); + + if (dev->timer_adptr_sz == NULL) { + dev->adptr_xae_cnt += timr->nb_timers; + dev->timer_adptr_sz = old_sz_ptr; + dev->tim_adptr_ring_cnt--; + return; + } + + dev->timer_adptr_rings[dev->tim_adptr_ring_cnt - 1] = + timr->ring_id; + dev->timer_adptr_sz[dev->tim_adptr_ring_cnt - 1] = + timr->nb_timers; + + dev->adptr_xae_cnt += timr->nb_timers; + break; + } + default: + break; + } +} + +static inline void +sso_updt_lookup_mem(const struct rte_eventdev *event_dev, void *lookup_mem) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + int i; + + for (i = 0; i < dev->nb_event_ports; i++) { + if (dev->dual_ws) { + struct otx2_ssogws_dual *ws = event_dev->data->ports[i]; + + ws->lookup_mem = lookup_mem; + } else { + struct otx2_ssogws *ws = event_dev->data->ports[i]; + + ws->lookup_mem = lookup_mem; + } + } +} + +int +otx2_sso_rx_adapter_queue_add(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id, + const struct rte_event_eth_rx_adapter_queue_conf *queue_conf) +{ + struct otx2_eth_dev *otx2_eth_dev = eth_dev->data->dev_private; + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint16_t port = eth_dev->data->port_id; + struct otx2_eth_rxq *rxq; + int i, rc; + + rc = strncmp(eth_dev->device->driver->name, "net_octeontx2", 13); + if (rc) + return -EINVAL; + + if (rx_queue_id < 0) { + for (i = 0 ; i < eth_dev->data->nb_rx_queues; i++) { + rxq = eth_dev->data->rx_queues[i]; + sso_updt_xae_cnt(dev, rxq, RTE_EVENT_TYPE_ETHDEV); + rc = sso_xae_reconfigure((struct rte_eventdev *) + (uintptr_t)event_dev); + rc |= sso_rxq_enable(otx2_eth_dev, i, + queue_conf->ev.sched_type, + queue_conf->ev.queue_id, port); + } + rxq = eth_dev->data->rx_queues[0]; + sso_updt_lookup_mem(event_dev, rxq->lookup_mem); + } else { + rxq = eth_dev->data->rx_queues[rx_queue_id]; + sso_updt_xae_cnt(dev, rxq, RTE_EVENT_TYPE_ETHDEV); + rc = sso_xae_reconfigure((struct rte_eventdev *) + (uintptr_t)event_dev); + rc |= sso_rxq_enable(otx2_eth_dev, (uint16_t)rx_queue_id, + queue_conf->ev.sched_type, + queue_conf->ev.queue_id, port); + sso_updt_lookup_mem(event_dev, rxq->lookup_mem); + } + + if (rc < 0) { + otx2_err("Failed to configure Rx adapter port=%d, q=%d", port, + queue_conf->ev.queue_id); + return rc; + } + + dev->rx_offloads |= otx2_eth_dev->rx_offload_flags; + dev->tstamp = &otx2_eth_dev->tstamp; + sso_fastpath_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +int +otx2_sso_rx_adapter_queue_del(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t rx_queue_id) +{ + struct otx2_eth_dev *dev = eth_dev->data->dev_private; + int i, rc; + + RTE_SET_USED(event_dev); + rc = strncmp(eth_dev->device->driver->name, "net_octeontx2", 13); + if (rc) + return -EINVAL; + + if (rx_queue_id < 0) { + for (i = 0 ; i < eth_dev->data->nb_rx_queues; i++) + rc = sso_rxq_disable(dev, i); + } else { + rc = sso_rxq_disable(dev, (uint16_t)rx_queue_id); + } + + if (rc < 0) + otx2_err("Failed to clear Rx adapter config port=%d, q=%d", + eth_dev->data->port_id, rx_queue_id); + + return rc; +} + +int +otx2_sso_rx_adapter_start(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +otx2_sso_rx_adapter_stop(const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev) +{ + RTE_SET_USED(event_dev); + RTE_SET_USED(eth_dev); + + return 0; +} + +int +otx2_sso_tx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, uint32_t *caps) +{ + int ret; + + RTE_SET_USED(dev); + ret = strncmp(eth_dev->device->driver->name, "net_octeontx2,", 13); + if (ret) + *caps = 0; + else + *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT; + + return 0; +} + +static int +sso_sqb_aura_limit_edit(struct rte_mempool *mp, uint16_t nb_sqb_bufs) +{ + struct otx2_npa_lf *npa_lf = otx2_intra_dev_get_cfg()->npa_lf; + struct npa_aq_enq_req *aura_req; + + aura_req = otx2_mbox_alloc_msg_npa_aq_enq(npa_lf->mbox); + aura_req->aura_id = npa_lf_aura_handle_to_aura(mp->pool_id); + aura_req->ctype = NPA_AQ_CTYPE_AURA; + aura_req->op = NPA_AQ_INSTOP_WRITE; + + aura_req->aura.limit = nb_sqb_bufs; + aura_req->aura_mask.limit = ~(aura_req->aura_mask.limit); + + return otx2_mbox_process(npa_lf->mbox); +} + +int +otx2_sso_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct otx2_eth_dev *otx2_eth_dev = eth_dev->data->dev_private; + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct otx2_eth_txq *txq; + int i; + + RTE_SET_USED(id); + if (tx_queue_id < 0) { + for (i = 0 ; i < eth_dev->data->nb_tx_queues; i++) { + txq = eth_dev->data->tx_queues[i]; + sso_sqb_aura_limit_edit(txq->sqb_pool, + OTX2_SSO_SQB_LIMIT); + } + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sso_sqb_aura_limit_edit(txq->sqb_pool, OTX2_SSO_SQB_LIMIT); + } + + dev->tx_offloads |= otx2_eth_dev->tx_offload_flags; + sso_fastpath_fns_set((struct rte_eventdev *)(uintptr_t)event_dev); + + return 0; +} + +int +otx2_sso_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *event_dev, + const struct rte_eth_dev *eth_dev, + int32_t tx_queue_id) +{ + struct otx2_eth_txq *txq; + int i; + + RTE_SET_USED(id); + RTE_SET_USED(eth_dev); + RTE_SET_USED(event_dev); + if (tx_queue_id < 0) { + for (i = 0 ; i < eth_dev->data->nb_tx_queues; i++) { + txq = eth_dev->data->tx_queues[i]; + sso_sqb_aura_limit_edit(txq->sqb_pool, + txq->nb_sqb_bufs); + } + } else { + txq = eth_dev->data->tx_queues[tx_queue_id]; + sso_sqb_aura_limit_edit(txq->sqb_pool, txq->nb_sqb_bufs); + } + + return 0; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_irq.c b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_irq.c new file mode 100644 index 000000000..a2033646e --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_irq.c @@ -0,0 +1,272 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#include "otx2_evdev.h" +#include "otx2_tim_evdev.h" + +static void +sso_lf_irq(void *param) +{ + uintptr_t base = (uintptr_t)param; + uint64_t intr; + uint8_t ggrp; + + ggrp = (base >> 12) & 0xFF; + + intr = otx2_read64(base + SSO_LF_GGRP_INT); + if (intr == 0) + return; + + otx2_err("GGRP %d GGRP_INT=0x%" PRIx64 "", ggrp, intr); + + /* Clear interrupt */ + otx2_write64(intr, base + SSO_LF_GGRP_INT); +} + +static int +sso_lf_register_irq(const struct rte_eventdev *event_dev, uint16_t ggrp_msixoff, + uintptr_t base) +{ + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(event_dev->dev); + struct rte_intr_handle *handle = &pci_dev->intr_handle; + int rc, vec; + + vec = ggrp_msixoff + SSO_LF_INT_VEC_GRP; + + /* Clear err interrupt */ + otx2_write64(~0ull, base + SSO_LF_GGRP_INT_ENA_W1C); + /* Set used interrupt vectors */ + rc = otx2_register_irq(handle, sso_lf_irq, (void *)base, vec); + /* Enable hw interrupt */ + otx2_write64(~0ull, base + SSO_LF_GGRP_INT_ENA_W1S); + + return rc; +} + +static void +ssow_lf_irq(void *param) +{ + uintptr_t base = (uintptr_t)param; + uint8_t gws = (base >> 12) & 0xFF; + uint64_t intr; + + intr = otx2_read64(base + SSOW_LF_GWS_INT); + if (intr == 0) + return; + + otx2_err("GWS %d GWS_INT=0x%" PRIx64 "", gws, intr); + + /* Clear interrupt */ + otx2_write64(intr, base + SSOW_LF_GWS_INT); +} + +static int +ssow_lf_register_irq(const struct rte_eventdev *event_dev, uint16_t gws_msixoff, + uintptr_t base) +{ + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(event_dev->dev); + struct rte_intr_handle *handle = &pci_dev->intr_handle; + int rc, vec; + + vec = gws_msixoff + SSOW_LF_INT_VEC_IOP; + + /* Clear err interrupt */ + otx2_write64(~0ull, base + SSOW_LF_GWS_INT_ENA_W1C); + /* Set used interrupt vectors */ + rc = otx2_register_irq(handle, ssow_lf_irq, (void *)base, vec); + /* Enable hw interrupt */ + otx2_write64(~0ull, base + SSOW_LF_GWS_INT_ENA_W1S); + + return rc; +} + +static void +sso_lf_unregister_irq(const struct rte_eventdev *event_dev, + uint16_t ggrp_msixoff, uintptr_t base) +{ + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(event_dev->dev); + struct rte_intr_handle *handle = &pci_dev->intr_handle; + int vec; + + vec = ggrp_msixoff + SSO_LF_INT_VEC_GRP; + + /* Clear err interrupt */ + otx2_write64(~0ull, base + SSO_LF_GGRP_INT_ENA_W1C); + otx2_unregister_irq(handle, sso_lf_irq, (void *)base, vec); +} + +static void +ssow_lf_unregister_irq(const struct rte_eventdev *event_dev, + uint16_t gws_msixoff, uintptr_t base) +{ + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(event_dev->dev); + struct rte_intr_handle *handle = &pci_dev->intr_handle; + int vec; + + vec = gws_msixoff + SSOW_LF_INT_VEC_IOP; + + /* Clear err interrupt */ + otx2_write64(~0ull, base + SSOW_LF_GWS_INT_ENA_W1C); + otx2_unregister_irq(handle, ssow_lf_irq, (void *)base, vec); +} + +int +sso_register_irqs(const struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + int i, rc = -EINVAL; + uint8_t nb_ports; + + nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1); + + for (i = 0; i < dev->nb_event_queues; i++) { + if (dev->sso_msixoff[i] == MSIX_VECTOR_INVALID) { + otx2_err("Invalid SSOLF MSIX offset[%d] vector: 0x%x", + i, dev->sso_msixoff[i]); + goto fail; + } + } + + for (i = 0; i < nb_ports; i++) { + if (dev->ssow_msixoff[i] == MSIX_VECTOR_INVALID) { + otx2_err("Invalid SSOWLF MSIX offset[%d] vector: 0x%x", + i, dev->ssow_msixoff[i]); + goto fail; + } + } + + for (i = 0; i < dev->nb_event_queues; i++) { + uintptr_t base = dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | + i << 12); + rc = sso_lf_register_irq(event_dev, dev->sso_msixoff[i], base); + } + + for (i = 0; i < nb_ports; i++) { + uintptr_t base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | + i << 12); + rc = ssow_lf_register_irq(event_dev, dev->ssow_msixoff[i], + base); + } + +fail: + return rc; +} + +void +sso_unregister_irqs(const struct rte_eventdev *event_dev) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint8_t nb_ports; + int i; + + nb_ports = dev->nb_event_ports * (dev->dual_ws ? 2 : 1); + + for (i = 0; i < dev->nb_event_queues; i++) { + uintptr_t base = dev->bar2 + (RVU_BLOCK_ADDR_SSO << 20 | + i << 12); + sso_lf_unregister_irq(event_dev, dev->sso_msixoff[i], base); + } + + for (i = 0; i < nb_ports; i++) { + uintptr_t base = dev->bar2 + (RVU_BLOCK_ADDR_SSOW << 20 | + i << 12); + ssow_lf_unregister_irq(event_dev, dev->ssow_msixoff[i], base); + } +} + +static void +tim_lf_irq(void *param) +{ + uintptr_t base = (uintptr_t)param; + uint64_t intr; + uint8_t ring; + + ring = (base >> 12) & 0xFF; + + intr = otx2_read64(base + TIM_LF_NRSPERR_INT); + otx2_err("TIM RING %d TIM_LF_NRSPERR_INT=0x%" PRIx64 "", ring, intr); + intr = otx2_read64(base + TIM_LF_RAS_INT); + otx2_err("TIM RING %d TIM_LF_RAS_INT=0x%" PRIx64 "", ring, intr); + + /* Clear interrupt */ + otx2_write64(intr, base + TIM_LF_NRSPERR_INT); + otx2_write64(intr, base + TIM_LF_RAS_INT); +} + +static int +tim_lf_register_irq(struct rte_pci_device *pci_dev, uint16_t tim_msixoff, + uintptr_t base) +{ + struct rte_intr_handle *handle = &pci_dev->intr_handle; + int rc, vec; + + vec = tim_msixoff + TIM_LF_INT_VEC_NRSPERR_INT; + + /* Clear err interrupt */ + otx2_write64(~0ull, base + TIM_LF_NRSPERR_INT); + /* Set used interrupt vectors */ + rc = otx2_register_irq(handle, tim_lf_irq, (void *)base, vec); + /* Enable hw interrupt */ + otx2_write64(~0ull, base + TIM_LF_NRSPERR_INT_ENA_W1S); + + vec = tim_msixoff + TIM_LF_INT_VEC_RAS_INT; + + /* Clear err interrupt */ + otx2_write64(~0ull, base + TIM_LF_RAS_INT); + /* Set used interrupt vectors */ + rc = otx2_register_irq(handle, tim_lf_irq, (void *)base, vec); + /* Enable hw interrupt */ + otx2_write64(~0ull, base + TIM_LF_RAS_INT_ENA_W1S); + + return rc; +} + +static void +tim_lf_unregister_irq(struct rte_pci_device *pci_dev, uint16_t tim_msixoff, + uintptr_t base) +{ + struct rte_intr_handle *handle = &pci_dev->intr_handle; + int vec; + + vec = tim_msixoff + TIM_LF_INT_VEC_NRSPERR_INT; + + /* Clear err interrupt */ + otx2_write64(~0ull, base + TIM_LF_NRSPERR_INT_ENA_W1C); + otx2_unregister_irq(handle, tim_lf_irq, (void *)base, vec); + + vec = tim_msixoff + TIM_LF_INT_VEC_RAS_INT; + + /* Clear err interrupt */ + otx2_write64(~0ull, base + TIM_LF_RAS_INT_ENA_W1C); + otx2_unregister_irq(handle, tim_lf_irq, (void *)base, vec); +} + +int +tim_register_irq(uint16_t ring_id) +{ + struct otx2_tim_evdev *dev = tim_priv_get(); + int rc = -EINVAL; + uintptr_t base; + + if (dev->tim_msixoff[ring_id] == MSIX_VECTOR_INVALID) { + otx2_err("Invalid TIMLF MSIX offset[%d] vector: 0x%x", + ring_id, dev->tim_msixoff[ring_id]); + goto fail; + } + + base = dev->bar2 + (RVU_BLOCK_ADDR_TIM << 20 | ring_id << 12); + rc = tim_lf_register_irq(dev->pci_dev, dev->tim_msixoff[ring_id], base); +fail: + return rc; +} + +void +tim_unregister_irq(uint16_t ring_id) +{ + struct otx2_tim_evdev *dev = tim_priv_get(); + uintptr_t base; + + base = dev->bar2 + (RVU_BLOCK_ADDR_TIM << 20 | ring_id << 12); + tim_lf_unregister_irq(dev->pci_dev, dev->tim_msixoff[ring_id], base); +} diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_selftest.c b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_selftest.c new file mode 100644 index 000000000..8440a50aa --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_selftest.c @@ -0,0 +1,1511 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#include <rte_atomic.h> +#include <rte_common.h> +#include <rte_cycles.h> +#include <rte_debug.h> +#include <rte_eal.h> +#include <rte_ethdev.h> +#include <rte_eventdev.h> +#include <rte_hexdump.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_memcpy.h> +#include <rte_per_lcore.h> +#include <rte_random.h> +#include <rte_test.h> + +#include "otx2_evdev.h" + +#define NUM_PACKETS (1024) +#define MAX_EVENTS (1024) + +#define OCTEONTX2_TEST_RUN(setup, teardown, test) \ + octeontx_test_run(setup, teardown, test, #test) + +static int total; +static int passed; +static int failed; +static int unsupported; + +static int evdev; +static struct rte_mempool *eventdev_test_mempool; + +struct event_attr { + uint32_t flow_id; + uint8_t event_type; + uint8_t sub_event_type; + uint8_t sched_type; + uint8_t queue; + uint8_t port; +}; + +static uint32_t seqn_list_index; +static int seqn_list[NUM_PACKETS]; + +static inline void +seqn_list_init(void) +{ + RTE_BUILD_BUG_ON(NUM_PACKETS < MAX_EVENTS); + memset(seqn_list, 0, sizeof(seqn_list)); + seqn_list_index = 0; +} + +static inline int +seqn_list_update(int val) +{ + if (seqn_list_index >= NUM_PACKETS) + return -1; + + seqn_list[seqn_list_index++] = val; + rte_smp_wmb(); + return 0; +} + +static inline int +seqn_list_check(int limit) +{ + int i; + + for (i = 0; i < limit; i++) { + if (seqn_list[i] != i) { + otx2_err("Seqn mismatch %d %d", seqn_list[i], i); + return -1; + } + } + return 0; +} + +struct test_core_param { + rte_atomic32_t *total_events; + uint64_t dequeue_tmo_ticks; + uint8_t port; + uint8_t sched_type; +}; + +static int +testsuite_setup(void) +{ + const char *eventdev_name = "event_octeontx2"; + + evdev = rte_event_dev_get_dev_id(eventdev_name); + if (evdev < 0) { + otx2_err("%d: Eventdev %s not found", __LINE__, eventdev_name); + return -1; + } + return 0; +} + +static void +testsuite_teardown(void) +{ + rte_event_dev_close(evdev); +} + +static inline void +devconf_set_default_sane_values(struct rte_event_dev_config *dev_conf, + struct rte_event_dev_info *info) +{ + memset(dev_conf, 0, sizeof(struct rte_event_dev_config)); + dev_conf->dequeue_timeout_ns = info->min_dequeue_timeout_ns; + dev_conf->nb_event_ports = info->max_event_ports; + dev_conf->nb_event_queues = info->max_event_queues; + dev_conf->nb_event_queue_flows = info->max_event_queue_flows; + dev_conf->nb_event_port_dequeue_depth = + info->max_event_port_dequeue_depth; + dev_conf->nb_event_port_enqueue_depth = + info->max_event_port_enqueue_depth; + dev_conf->nb_event_port_enqueue_depth = + info->max_event_port_enqueue_depth; + dev_conf->nb_events_limit = + info->max_num_events; +} + +enum { + TEST_EVENTDEV_SETUP_DEFAULT, + TEST_EVENTDEV_SETUP_PRIORITY, + TEST_EVENTDEV_SETUP_DEQUEUE_TIMEOUT, +}; + +static inline int +_eventdev_setup(int mode) +{ + const char *pool_name = "evdev_octeontx_test_pool"; + struct rte_event_dev_config dev_conf; + struct rte_event_dev_info info; + int i, ret; + + /* Create and destrory pool for each test case to make it standalone */ + eventdev_test_mempool = rte_pktmbuf_pool_create(pool_name, MAX_EVENTS, + 0, 0, 512, + rte_socket_id()); + if (!eventdev_test_mempool) { + otx2_err("ERROR creating mempool"); + return -1; + } + + ret = rte_event_dev_info_get(evdev, &info); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get event dev info"); + + devconf_set_default_sane_values(&dev_conf, &info); + if (mode == TEST_EVENTDEV_SETUP_DEQUEUE_TIMEOUT) + dev_conf.event_dev_cfg |= RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT; + + ret = rte_event_dev_configure(evdev, &dev_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to configure eventdev"); + + uint32_t queue_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &queue_count), + "Queue count get failed"); + + if (mode == TEST_EVENTDEV_SETUP_PRIORITY) { + if (queue_count > 8) + queue_count = 8; + + /* Configure event queues(0 to n) with + * RTE_EVENT_DEV_PRIORITY_HIGHEST to + * RTE_EVENT_DEV_PRIORITY_LOWEST + */ + uint8_t step = (RTE_EVENT_DEV_PRIORITY_LOWEST + 1) / + queue_count; + for (i = 0; i < (int)queue_count; i++) { + struct rte_event_queue_conf queue_conf; + + ret = rte_event_queue_default_conf_get(evdev, i, + &queue_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get def_conf%d", + i); + queue_conf.priority = i * step; + ret = rte_event_queue_setup(evdev, i, &queue_conf); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup queue=%d", + i); + } + + } else { + /* Configure event queues with default priority */ + for (i = 0; i < (int)queue_count; i++) { + ret = rte_event_queue_setup(evdev, i, NULL); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup queue=%d", + i); + } + } + /* Configure event ports */ + uint32_t port_count; + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, &port_count), + "Port count get failed"); + for (i = 0; i < (int)port_count; i++) { + ret = rte_event_port_setup(evdev, i, NULL); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to setup port=%d", i); + ret = rte_event_port_link(evdev, i, NULL, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, "Failed to link all queues port=%d", + i); + } + + ret = rte_event_dev_start(evdev); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to start device"); + + return 0; +} + +static inline int +eventdev_setup(void) +{ + return _eventdev_setup(TEST_EVENTDEV_SETUP_DEFAULT); +} + +static inline int +eventdev_setup_priority(void) +{ + return _eventdev_setup(TEST_EVENTDEV_SETUP_PRIORITY); +} + +static inline int +eventdev_setup_dequeue_timeout(void) +{ + return _eventdev_setup(TEST_EVENTDEV_SETUP_DEQUEUE_TIMEOUT); +} + +static inline void +eventdev_teardown(void) +{ + rte_event_dev_stop(evdev); + rte_mempool_free(eventdev_test_mempool); +} + +static inline void +update_event_and_validation_attr(struct rte_mbuf *m, struct rte_event *ev, + uint32_t flow_id, uint8_t event_type, + uint8_t sub_event_type, uint8_t sched_type, + uint8_t queue, uint8_t port) +{ + struct event_attr *attr; + + /* Store the event attributes in mbuf for future reference */ + attr = rte_pktmbuf_mtod(m, struct event_attr *); + attr->flow_id = flow_id; + attr->event_type = event_type; + attr->sub_event_type = sub_event_type; + attr->sched_type = sched_type; + attr->queue = queue; + attr->port = port; + + ev->flow_id = flow_id; + ev->sub_event_type = sub_event_type; + ev->event_type = event_type; + /* Inject the new event */ + ev->op = RTE_EVENT_OP_NEW; + ev->sched_type = sched_type; + ev->queue_id = queue; + ev->mbuf = m; +} + +static inline int +inject_events(uint32_t flow_id, uint8_t event_type, uint8_t sub_event_type, + uint8_t sched_type, uint8_t queue, uint8_t port, + unsigned int events) +{ + struct rte_mbuf *m; + unsigned int i; + + for (i = 0; i < events; i++) { + struct rte_event ev = {.event = 0, .u64 = 0}; + + m = rte_pktmbuf_alloc(eventdev_test_mempool); + RTE_TEST_ASSERT_NOT_NULL(m, "mempool alloc failed"); + + m->seqn = i; + update_event_and_validation_attr(m, &ev, flow_id, event_type, + sub_event_type, sched_type, + queue, port); + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + return 0; +} + +static inline int +check_excess_events(uint8_t port) +{ + uint16_t valid_event; + struct rte_event ev; + int i; + + /* Check for excess events, try for a few times and exit */ + for (i = 0; i < 32; i++) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + + RTE_TEST_ASSERT_SUCCESS(valid_event, + "Unexpected valid event=%d", + ev.mbuf->seqn); + } + return 0; +} + +static inline int +generate_random_events(const unsigned int total_events) +{ + struct rte_event_dev_info info; + uint32_t queue_count; + unsigned int i; + int ret; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &queue_count), + "Queue count get failed"); + + ret = rte_event_dev_info_get(evdev, &info); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to get event dev info"); + for (i = 0; i < total_events; i++) { + ret = inject_events( + rte_rand() % info.max_event_queue_flows /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + rte_rand() % queue_count /* queue */, + 0 /* port */, + 1 /* events */); + if (ret) + return -1; + } + return ret; +} + + +static inline int +validate_event(struct rte_event *ev) +{ + struct event_attr *attr; + + attr = rte_pktmbuf_mtod(ev->mbuf, struct event_attr *); + RTE_TEST_ASSERT_EQUAL(attr->flow_id, ev->flow_id, + "flow_id mismatch enq=%d deq =%d", + attr->flow_id, ev->flow_id); + RTE_TEST_ASSERT_EQUAL(attr->event_type, ev->event_type, + "event_type mismatch enq=%d deq =%d", + attr->event_type, ev->event_type); + RTE_TEST_ASSERT_EQUAL(attr->sub_event_type, ev->sub_event_type, + "sub_event_type mismatch enq=%d deq =%d", + attr->sub_event_type, ev->sub_event_type); + RTE_TEST_ASSERT_EQUAL(attr->sched_type, ev->sched_type, + "sched_type mismatch enq=%d deq =%d", + attr->sched_type, ev->sched_type); + RTE_TEST_ASSERT_EQUAL(attr->queue, ev->queue_id, + "queue mismatch enq=%d deq =%d", + attr->queue, ev->queue_id); + return 0; +} + +typedef int (*validate_event_cb)(uint32_t index, uint8_t port, + struct rte_event *ev); + +static inline int +consume_events(uint8_t port, const uint32_t total_events, validate_event_cb fn) +{ + uint32_t events = 0, forward_progress_cnt = 0, index = 0; + uint16_t valid_event; + struct rte_event ev; + int ret; + + while (1) { + if (++forward_progress_cnt > UINT16_MAX) { + otx2_err("Detected deadlock"); + return -1; + } + + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + forward_progress_cnt = 0; + ret = validate_event(&ev); + if (ret) + return -1; + + if (fn != NULL) { + ret = fn(index, port, &ev); + RTE_TEST_ASSERT_SUCCESS(ret, + "Failed to validate test specific event"); + } + + ++index; + + rte_pktmbuf_free(ev.mbuf); + if (++events >= total_events) + break; + } + + return check_excess_events(port); +} + +static int +validate_simple_enqdeq(uint32_t index, uint8_t port, struct rte_event *ev) +{ + RTE_SET_USED(port); + RTE_TEST_ASSERT_EQUAL(index, ev->mbuf->seqn, "index=%d != seqn=%d", + index, ev->mbuf->seqn); + return 0; +} + +static inline int +test_simple_enqdeq(uint8_t sched_type) +{ + int ret; + + ret = inject_events(0 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type */, + sched_type, + 0 /* queue */, + 0 /* port */, + MAX_EVENTS); + if (ret) + return -1; + + return consume_events(0 /* port */, MAX_EVENTS, validate_simple_enqdeq); +} + +static int +test_simple_enqdeq_ordered(void) +{ + return test_simple_enqdeq(RTE_SCHED_TYPE_ORDERED); +} + +static int +test_simple_enqdeq_atomic(void) +{ + return test_simple_enqdeq(RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_simple_enqdeq_parallel(void) +{ + return test_simple_enqdeq(RTE_SCHED_TYPE_PARALLEL); +} + +/* + * Generate a prescribed number of events and spread them across available + * queues. On dequeue, using single event port(port 0) verify the enqueued + * event attributes + */ +static int +test_multi_queue_enq_single_port_deq(void) +{ + int ret; + + ret = generate_random_events(MAX_EVENTS); + if (ret) + return -1; + + return consume_events(0 /* port */, MAX_EVENTS, NULL); +} + +/* + * Inject 0..MAX_EVENTS events over 0..queue_count with modulus + * operation + * + * For example, Inject 32 events over 0..7 queues + * enqueue events 0, 8, 16, 24 in queue 0 + * enqueue events 1, 9, 17, 25 in queue 1 + * .. + * .. + * enqueue events 7, 15, 23, 31 in queue 7 + * + * On dequeue, Validate the events comes in 0,8,16,24,1,9,17,25..,7,15,23,31 + * order from queue0(highest priority) to queue7(lowest_priority) + */ +static int +validate_queue_priority(uint32_t index, uint8_t port, struct rte_event *ev) +{ + uint32_t queue_count; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &queue_count), + "Queue count get failed"); + if (queue_count > 8) + queue_count = 8; + uint32_t range = MAX_EVENTS / queue_count; + uint32_t expected_val = (index % range) * queue_count; + + expected_val += ev->queue_id; + RTE_SET_USED(port); + RTE_TEST_ASSERT_EQUAL(ev->mbuf->seqn, expected_val, + "seqn=%d index=%d expected=%d range=%d nb_queues=%d max_event=%d", + ev->mbuf->seqn, index, expected_val, range, + queue_count, MAX_EVENTS); + return 0; +} + +static int +test_multi_queue_priority(void) +{ + int i, max_evts_roundoff; + /* See validate_queue_priority() comments for priority validate logic */ + uint32_t queue_count; + struct rte_mbuf *m; + uint8_t queue; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &queue_count), + "Queue count get failed"); + if (queue_count > 8) + queue_count = 8; + max_evts_roundoff = MAX_EVENTS / queue_count; + max_evts_roundoff *= queue_count; + + for (i = 0; i < max_evts_roundoff; i++) { + struct rte_event ev = {.event = 0, .u64 = 0}; + + m = rte_pktmbuf_alloc(eventdev_test_mempool); + RTE_TEST_ASSERT_NOT_NULL(m, "mempool alloc failed"); + + m->seqn = i; + queue = i % queue_count; + update_event_and_validation_attr(m, &ev, 0, RTE_EVENT_TYPE_CPU, + 0, RTE_SCHED_TYPE_PARALLEL, + queue, 0); + rte_event_enqueue_burst(evdev, 0, &ev, 1); + } + + return consume_events(0, max_evts_roundoff, validate_queue_priority); +} + +static int +worker_multi_port_fn(void *arg) +{ + struct test_core_param *param = arg; + rte_atomic32_t *total_events = param->total_events; + uint8_t port = param->port; + uint16_t valid_event; + struct rte_event ev; + int ret; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + ret = validate_event(&ev); + RTE_TEST_ASSERT_SUCCESS(ret, "Failed to validate event"); + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } + + return 0; +} + +static inline int +wait_workers_to_join(const rte_atomic32_t *count) +{ + uint64_t cycles, print_cycles; + + cycles = rte_get_timer_cycles(); + print_cycles = cycles; + while (rte_atomic32_read(count)) { + uint64_t new_cycles = rte_get_timer_cycles(); + + if (new_cycles - print_cycles > rte_get_timer_hz()) { + otx2_err("Events %d", rte_atomic32_read(count)); + print_cycles = new_cycles; + } + if (new_cycles - cycles > rte_get_timer_hz() * 10000000000) { + otx2_err("No schedules for seconds, deadlock (%d)", + rte_atomic32_read(count)); + rte_event_dev_dump(evdev, stdout); + cycles = new_cycles; + return -1; + } + } + rte_eal_mp_wait_lcore(); + + return 0; +} + +static inline int +launch_workers_and_wait(int (*master_worker)(void *), + int (*slave_workers)(void *), uint32_t total_events, + uint8_t nb_workers, uint8_t sched_type) +{ + rte_atomic32_t atomic_total_events; + struct test_core_param *param; + uint64_t dequeue_tmo_ticks; + uint8_t port = 0; + int w_lcore; + int ret; + + if (!nb_workers) + return 0; + + rte_atomic32_set(&atomic_total_events, total_events); + seqn_list_init(); + + param = malloc(sizeof(struct test_core_param) * nb_workers); + if (!param) + return -1; + + ret = rte_event_dequeue_timeout_ticks(evdev, + rte_rand() % 10000000/* 10ms */, + &dequeue_tmo_ticks); + if (ret) { + free(param); + return -1; + } + + param[0].total_events = &atomic_total_events; + param[0].sched_type = sched_type; + param[0].port = 0; + param[0].dequeue_tmo_ticks = dequeue_tmo_ticks; + rte_wmb(); + + w_lcore = rte_get_next_lcore( + /* start core */ -1, + /* skip master */ 1, + /* wrap */ 0); + rte_eal_remote_launch(master_worker, ¶m[0], w_lcore); + + for (port = 1; port < nb_workers; port++) { + param[port].total_events = &atomic_total_events; + param[port].sched_type = sched_type; + param[port].port = port; + param[port].dequeue_tmo_ticks = dequeue_tmo_ticks; + rte_smp_wmb(); + w_lcore = rte_get_next_lcore(w_lcore, 1, 0); + rte_eal_remote_launch(slave_workers, ¶m[port], w_lcore); + } + + rte_smp_wmb(); + ret = wait_workers_to_join(&atomic_total_events); + free(param); + + return ret; +} + +/* + * Generate a prescribed number of events and spread them across available + * queues. Dequeue the events through multiple ports and verify the enqueued + * event attributes + */ +static int +test_multi_queue_enq_multi_port_deq(void) +{ + const unsigned int total_events = MAX_EVENTS; + uint32_t nr_ports; + int ret; + + ret = generate_random_events(total_events); + if (ret) + return -1; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, &nr_ports), + "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (!nr_ports) { + otx2_err("Not enough ports=%d or workers=%d", nr_ports, + rte_lcore_count() - 1); + return 0; + } + + return launch_workers_and_wait(worker_multi_port_fn, + worker_multi_port_fn, total_events, + nr_ports, 0xff /* invalid */); +} + +static +void flush(uint8_t dev_id, struct rte_event event, void *arg) +{ + unsigned int *count = arg; + + RTE_SET_USED(dev_id); + if (event.event_type == RTE_EVENT_TYPE_CPU) + *count = *count + 1; +} + +static int +test_dev_stop_flush(void) +{ + unsigned int total_events = MAX_EVENTS, count = 0; + int ret; + + ret = generate_random_events(total_events); + if (ret) + return -1; + + ret = rte_event_dev_stop_flush_callback_register(evdev, flush, &count); + if (ret) + return -2; + rte_event_dev_stop(evdev); + ret = rte_event_dev_stop_flush_callback_register(evdev, NULL, NULL); + if (ret) + return -3; + RTE_TEST_ASSERT_EQUAL(total_events, count, + "count mismatch total_events=%d count=%d", + total_events, count); + + return 0; +} + +static int +validate_queue_to_port_single_link(uint32_t index, uint8_t port, + struct rte_event *ev) +{ + RTE_SET_USED(index); + RTE_TEST_ASSERT_EQUAL(port, ev->queue_id, + "queue mismatch enq=%d deq =%d", + port, ev->queue_id); + + return 0; +} + +/* + * Link queue x to port x and check correctness of link by checking + * queue_id == x on dequeue on the specific port x + */ +static int +test_queue_to_port_single_link(void) +{ + int i, nr_links, ret; + uint32_t queue_count; + uint32_t port_count; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, &port_count), + "Port count get failed"); + + /* Unlink all connections that created in eventdev_setup */ + for (i = 0; i < (int)port_count; i++) { + ret = rte_event_port_unlink(evdev, i, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, + "Failed to unlink all queues port=%d", i); + } + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &queue_count), + "Queue count get failed"); + + nr_links = RTE_MIN(port_count, queue_count); + const unsigned int total_events = MAX_EVENTS / nr_links; + + /* Link queue x to port x and inject events to queue x through port x */ + for (i = 0; i < nr_links; i++) { + uint8_t queue = (uint8_t)i; + + ret = rte_event_port_link(evdev, i, &queue, NULL, 1); + RTE_TEST_ASSERT(ret == 1, "Failed to link queue to port %d", i); + + ret = inject_events(0x100 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + queue /* queue */, i /* port */, + total_events /* events */); + if (ret) + return -1; + } + + /* Verify the events generated from correct queue */ + for (i = 0; i < nr_links; i++) { + ret = consume_events(i /* port */, total_events, + validate_queue_to_port_single_link); + if (ret) + return -1; + } + + return 0; +} + +static int +validate_queue_to_port_multi_link(uint32_t index, uint8_t port, + struct rte_event *ev) +{ + RTE_SET_USED(index); + RTE_TEST_ASSERT_EQUAL(port, (ev->queue_id & 0x1), + "queue mismatch enq=%d deq =%d", + port, ev->queue_id); + + return 0; +} + +/* + * Link all even number of queues to port 0 and all odd number of queues to + * port 1 and verify the link connection on dequeue + */ +static int +test_queue_to_port_multi_link(void) +{ + int ret, port0_events = 0, port1_events = 0; + uint32_t nr_queues = 0; + uint32_t nr_ports = 0; + uint8_t queue, port; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &nr_queues), + "Queue count get failed"); + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &nr_queues), + "Queue count get failed"); + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, &nr_ports), + "Port count get failed"); + + if (nr_ports < 2) { + otx2_err("Not enough ports to test ports=%d", nr_ports); + return 0; + } + + /* Unlink all connections that created in eventdev_setup */ + for (port = 0; port < nr_ports; port++) { + ret = rte_event_port_unlink(evdev, port, NULL, 0); + RTE_TEST_ASSERT(ret >= 0, "Failed to unlink all queues port=%d", + port); + } + + const unsigned int total_events = MAX_EVENTS / nr_queues; + + /* Link all even number of queues to port0 and odd numbers to port 1*/ + for (queue = 0; queue < nr_queues; queue++) { + port = queue & 0x1; + ret = rte_event_port_link(evdev, port, &queue, NULL, 1); + RTE_TEST_ASSERT(ret == 1, "Failed to link queue=%d to port=%d", + queue, port); + + ret = inject_events(0x100 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + rte_rand() % 256 /* sub_event_type */, + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1), + queue /* queue */, port /* port */, + total_events /* events */); + if (ret) + return -1; + + if (port == 0) + port0_events += total_events; + else + port1_events += total_events; + } + + ret = consume_events(0 /* port */, port0_events, + validate_queue_to_port_multi_link); + if (ret) + return -1; + ret = consume_events(1 /* port */, port1_events, + validate_queue_to_port_multi_link); + if (ret) + return -1; + + return 0; +} + +static int +worker_flow_based_pipeline(void *arg) +{ + struct test_core_param *param = arg; + uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks; + rte_atomic32_t *total_events = param->total_events; + uint8_t new_sched_type = param->sched_type; + uint8_t port = param->port; + uint16_t valid_event; + struct rte_event ev; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, + dequeue_tmo_ticks); + if (!valid_event) + continue; + + /* Events from stage 0 */ + if (ev.sub_event_type == 0) { + /* Move to atomic flow to maintain the ordering */ + ev.flow_id = 0x2; + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.sub_event_type = 1; /* stage 1 */ + ev.sched_type = new_sched_type; + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } else if (ev.sub_event_type == 1) { /* Events from stage 1*/ + if (seqn_list_update(ev.mbuf->seqn) == 0) { + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + otx2_err("Failed to update seqn_list"); + return -1; + } + } else { + otx2_err("Invalid ev.sub_event_type = %d", + ev.sub_event_type); + return -1; + } + } + return 0; +} + +static int +test_multiport_flow_sched_type_test(uint8_t in_sched_type, + uint8_t out_sched_type) +{ + const unsigned int total_events = MAX_EVENTS; + uint32_t nr_ports; + int ret; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, &nr_ports), + "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (!nr_ports) { + otx2_err("Not enough ports=%d or workers=%d", nr_ports, + rte_lcore_count() - 1); + return 0; + } + + /* Injects events with m->seqn=0 to total_events */ + ret = inject_events(0x1 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type (stage 0) */, + in_sched_type, + 0 /* queue */, + 0 /* port */, + total_events /* events */); + if (ret) + return -1; + + rte_mb(); + ret = launch_workers_and_wait(worker_flow_based_pipeline, + worker_flow_based_pipeline, total_events, + nr_ports, out_sched_type); + if (ret) + return -1; + + if (in_sched_type != RTE_SCHED_TYPE_PARALLEL && + out_sched_type == RTE_SCHED_TYPE_ATOMIC) { + /* Check the events order maintained or not */ + return seqn_list_check(total_events); + } + + return 0; +} + +/* Multi port ordered to atomic transaction */ +static int +test_multi_port_flow_ordered_to_atomic(void) +{ + /* Ingress event order test */ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_flow_ordered_to_ordered(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_flow_ordered_to_parallel(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +test_multi_port_flow_atomic_to_atomic(void) +{ + /* Ingress event order test */ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_flow_atomic_to_ordered(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_flow_atomic_to_parallel(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +test_multi_port_flow_parallel_to_atomic(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_flow_parallel_to_ordered(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_flow_parallel_to_parallel(void) +{ + return test_multiport_flow_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +worker_group_based_pipeline(void *arg) +{ + struct test_core_param *param = arg; + uint64_t dequeue_tmo_ticks = param->dequeue_tmo_ticks; + rte_atomic32_t *total_events = param->total_events; + uint8_t new_sched_type = param->sched_type; + uint8_t port = param->port; + uint16_t valid_event; + struct rte_event ev; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, + dequeue_tmo_ticks); + if (!valid_event) + continue; + + /* Events from stage 0(group 0) */ + if (ev.queue_id == 0) { + /* Move to atomic flow to maintain the ordering */ + ev.flow_id = 0x2; + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.sched_type = new_sched_type; + ev.queue_id = 1; /* Stage 1*/ + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } else if (ev.queue_id == 1) { /* Events from stage 1(group 1)*/ + if (seqn_list_update(ev.mbuf->seqn) == 0) { + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + otx2_err("Failed to update seqn_list"); + return -1; + } + } else { + otx2_err("Invalid ev.queue_id = %d", ev.queue_id); + return -1; + } + } + + return 0; +} + +static int +test_multiport_queue_sched_type_test(uint8_t in_sched_type, + uint8_t out_sched_type) +{ + const unsigned int total_events = MAX_EVENTS; + uint32_t queue_count; + uint32_t nr_ports; + int ret; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, &nr_ports), + "Port count get failed"); + + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &queue_count), + "Queue count get failed"); + if (queue_count < 2 || !nr_ports) { + otx2_err("Not enough queues=%d ports=%d or workers=%d", + queue_count, nr_ports, + rte_lcore_count() - 1); + return 0; + } + + /* Injects events with m->seqn=0 to total_events */ + ret = inject_events(0x1 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type (stage 0) */, + in_sched_type, + 0 /* queue */, + 0 /* port */, + total_events /* events */); + if (ret) + return -1; + + ret = launch_workers_and_wait(worker_group_based_pipeline, + worker_group_based_pipeline, total_events, + nr_ports, out_sched_type); + if (ret) + return -1; + + if (in_sched_type != RTE_SCHED_TYPE_PARALLEL && + out_sched_type == RTE_SCHED_TYPE_ATOMIC) { + /* Check the events order maintained or not */ + return seqn_list_check(total_events); + } + + return 0; +} + +static int +test_multi_port_queue_ordered_to_atomic(void) +{ + /* Ingress event order test */ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_queue_ordered_to_ordered(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_queue_ordered_to_parallel(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ORDERED, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +test_multi_port_queue_atomic_to_atomic(void) +{ + /* Ingress event order test */ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_queue_atomic_to_ordered(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_queue_atomic_to_parallel(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_ATOMIC, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +test_multi_port_queue_parallel_to_atomic(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_ATOMIC); +} + +static int +test_multi_port_queue_parallel_to_ordered(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_ORDERED); +} + +static int +test_multi_port_queue_parallel_to_parallel(void) +{ + return test_multiport_queue_sched_type_test(RTE_SCHED_TYPE_PARALLEL, + RTE_SCHED_TYPE_PARALLEL); +} + +static int +worker_flow_based_pipeline_max_stages_rand_sched_type(void *arg) +{ + struct test_core_param *param = arg; + rte_atomic32_t *total_events = param->total_events; + uint8_t port = param->port; + uint16_t valid_event; + struct rte_event ev; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + if (ev.sub_event_type == 255) { /* last stage */ + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.sub_event_type++; + ev.sched_type = + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1); + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + } + + return 0; +} + +static int +launch_multi_port_max_stages_random_sched_type(int (*fn)(void *)) +{ + uint32_t nr_ports; + int ret; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, &nr_ports), + "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (!nr_ports) { + otx2_err("Not enough ports=%d or workers=%d", + nr_ports, rte_lcore_count() - 1); + return 0; + } + + /* Injects events with m->seqn=0 to total_events */ + ret = inject_events(0x1 /*flow_id */, + RTE_EVENT_TYPE_CPU /* event_type */, + 0 /* sub_event_type (stage 0) */, + rte_rand() % + (RTE_SCHED_TYPE_PARALLEL + 1) /* sched_type */, + 0 /* queue */, + 0 /* port */, + MAX_EVENTS /* events */); + if (ret) + return -1; + + return launch_workers_and_wait(fn, fn, MAX_EVENTS, nr_ports, + 0xff /* invalid */); +} + +/* Flow based pipeline with maximum stages with random sched type */ +static int +test_multi_port_flow_max_stages_random_sched_type(void) +{ + return launch_multi_port_max_stages_random_sched_type( + worker_flow_based_pipeline_max_stages_rand_sched_type); +} + +static int +worker_queue_based_pipeline_max_stages_rand_sched_type(void *arg) +{ + struct test_core_param *param = arg; + uint8_t port = param->port; + uint32_t queue_count; + uint16_t valid_event; + struct rte_event ev; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &queue_count), + "Queue count get failed"); + uint8_t nr_queues = queue_count; + rte_atomic32_t *total_events = param->total_events; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + if (ev.queue_id == nr_queues - 1) { /* last stage */ + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.queue_id++; + ev.sched_type = + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1); + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + } + + return 0; +} + +/* Queue based pipeline with maximum stages with random sched type */ +static int +test_multi_port_queue_max_stages_random_sched_type(void) +{ + return launch_multi_port_max_stages_random_sched_type( + worker_queue_based_pipeline_max_stages_rand_sched_type); +} + +static int +worker_mixed_pipeline_max_stages_rand_sched_type(void *arg) +{ + struct test_core_param *param = arg; + uint8_t port = param->port; + uint32_t queue_count; + uint16_t valid_event; + struct rte_event ev; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &queue_count), + "Queue count get failed"); + uint8_t nr_queues = queue_count; + rte_atomic32_t *total_events = param->total_events; + + while (rte_atomic32_read(total_events) > 0) { + valid_event = rte_event_dequeue_burst(evdev, port, &ev, 1, 0); + if (!valid_event) + continue; + + if (ev.queue_id == nr_queues - 1) { /* Last stage */ + rte_pktmbuf_free(ev.mbuf); + rte_atomic32_sub(total_events, 1); + } else { + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.queue_id++; + ev.sub_event_type = rte_rand() % 256; + ev.sched_type = + rte_rand() % (RTE_SCHED_TYPE_PARALLEL + 1); + ev.op = RTE_EVENT_OP_FORWARD; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + } + + return 0; +} + +/* Queue and flow based pipeline with maximum stages with random sched type */ +static int +test_multi_port_mixed_max_stages_random_sched_type(void) +{ + return launch_multi_port_max_stages_random_sched_type( + worker_mixed_pipeline_max_stages_rand_sched_type); +} + +static int +worker_ordered_flow_producer(void *arg) +{ + struct test_core_param *param = arg; + uint8_t port = param->port; + struct rte_mbuf *m; + int counter = 0; + + while (counter < NUM_PACKETS) { + m = rte_pktmbuf_alloc(eventdev_test_mempool); + if (m == NULL) + continue; + + m->seqn = counter++; + + struct rte_event ev = {.event = 0, .u64 = 0}; + + ev.flow_id = 0x1; /* Generate a fat flow */ + ev.sub_event_type = 0; + /* Inject the new event */ + ev.op = RTE_EVENT_OP_NEW; + ev.event_type = RTE_EVENT_TYPE_CPU; + ev.sched_type = RTE_SCHED_TYPE_ORDERED; + ev.queue_id = 0; + ev.mbuf = m; + rte_event_enqueue_burst(evdev, port, &ev, 1); + } + + return 0; +} + +static inline int +test_producer_consumer_ingress_order_test(int (*fn)(void *)) +{ + uint32_t nr_ports; + + RTE_TEST_ASSERT_SUCCESS(rte_event_dev_attr_get(evdev, + RTE_EVENT_DEV_ATTR_PORT_COUNT, &nr_ports), + "Port count get failed"); + nr_ports = RTE_MIN(nr_ports, rte_lcore_count() - 1); + + if (rte_lcore_count() < 3 || nr_ports < 2) { + otx2_err("### Not enough cores for test."); + return 0; + } + + launch_workers_and_wait(worker_ordered_flow_producer, fn, + NUM_PACKETS, nr_ports, RTE_SCHED_TYPE_ATOMIC); + /* Check the events order maintained or not */ + return seqn_list_check(NUM_PACKETS); +} + +/* Flow based producer consumer ingress order test */ +static int +test_flow_producer_consumer_ingress_order_test(void) +{ + return test_producer_consumer_ingress_order_test( + worker_flow_based_pipeline); +} + +/* Queue based producer consumer ingress order test */ +static int +test_queue_producer_consumer_ingress_order_test(void) +{ + return test_producer_consumer_ingress_order_test( + worker_group_based_pipeline); +} + +static void octeontx_test_run(int (*setup)(void), void (*tdown)(void), + int (*test)(void), const char *name) +{ + if (setup() < 0) { + printf("Error setting up test %s", name); + unsupported++; + } else { + if (test() < 0) { + failed++; + printf("+ TestCase [%2d] : %s failed\n", total, name); + } else { + passed++; + printf("+ TestCase [%2d] : %s succeeded\n", total, + name); + } + } + + total++; + tdown(); +} + +int +otx2_sso_selftest(void) +{ + testsuite_setup(); + + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_simple_enqdeq_ordered); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_simple_enqdeq_atomic); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_simple_enqdeq_parallel); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_queue_enq_single_port_deq); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_dev_stop_flush); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_queue_enq_multi_port_deq); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_queue_to_port_single_link); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_queue_to_port_multi_link); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_ordered_to_atomic); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_ordered_to_ordered); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_ordered_to_parallel); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_atomic_to_atomic); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_atomic_to_ordered); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_atomic_to_parallel); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_parallel_to_atomic); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_parallel_to_ordered); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_parallel_to_parallel); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_ordered_to_atomic); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_ordered_to_ordered); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_ordered_to_parallel); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_atomic_to_atomic); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_atomic_to_ordered); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_atomic_to_parallel); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_parallel_to_atomic); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_parallel_to_ordered); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_parallel_to_parallel); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_flow_max_stages_random_sched_type); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_queue_max_stages_random_sched_type); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_multi_port_mixed_max_stages_random_sched_type); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_flow_producer_consumer_ingress_order_test); + OCTEONTX2_TEST_RUN(eventdev_setup, eventdev_teardown, + test_queue_producer_consumer_ingress_order_test); + OCTEONTX2_TEST_RUN(eventdev_setup_priority, eventdev_teardown, + test_multi_queue_priority); + OCTEONTX2_TEST_RUN(eventdev_setup_dequeue_timeout, eventdev_teardown, + test_multi_port_flow_ordered_to_atomic); + OCTEONTX2_TEST_RUN(eventdev_setup_dequeue_timeout, eventdev_teardown, + test_multi_port_queue_ordered_to_atomic); + printf("Total tests : %d\n", total); + printf("Passed : %d\n", passed); + printf("Failed : %d\n", failed); + printf("Not supported : %d\n", unsupported); + + testsuite_teardown(); + + if (failed) + return -1; + + return 0; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_stats.h b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_stats.h new file mode 100644 index 000000000..74fcec8a0 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_evdev_stats.h @@ -0,0 +1,286 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#ifndef __OTX2_EVDEV_STATS_H__ +#define __OTX2_EVDEV_STATS_H__ + +#include "otx2_evdev.h" + +struct otx2_sso_xstats_name { + const char name[RTE_EVENT_DEV_XSTATS_NAME_SIZE]; + const size_t offset; + const uint64_t mask; + const uint8_t shift; + uint64_t reset_snap[OTX2_SSO_MAX_VHGRP]; +}; + +static struct otx2_sso_xstats_name sso_hws_xstats[] = { + {"last_grp_serviced", offsetof(struct sso_hws_stats, arbitration), + 0x3FF, 0, {0} }, + {"affinity_arbitration_credits", + offsetof(struct sso_hws_stats, arbitration), + 0xF, 16, {0} }, +}; + +static struct otx2_sso_xstats_name sso_grp_xstats[] = { + {"wrk_sched", offsetof(struct sso_grp_stats, ws_pc), ~0x0, 0, + {0} }, + {"xaq_dram", offsetof(struct sso_grp_stats, ext_pc), ~0x0, + 0, {0} }, + {"add_wrk", offsetof(struct sso_grp_stats, wa_pc), ~0x0, 0, + {0} }, + {"tag_switch_req", offsetof(struct sso_grp_stats, ts_pc), ~0x0, 0, + {0} }, + {"desched_req", offsetof(struct sso_grp_stats, ds_pc), ~0x0, 0, + {0} }, + {"desched_wrk", offsetof(struct sso_grp_stats, dq_pc), ~0x0, 0, + {0} }, + {"xaq_cached", offsetof(struct sso_grp_stats, aw_status), 0x3, + 0, {0} }, + {"work_inflight", offsetof(struct sso_grp_stats, aw_status), 0x3F, + 16, {0} }, + {"inuse_pages", offsetof(struct sso_grp_stats, page_cnt), + 0xFFFFFFFF, 0, {0} }, +}; + +#define OTX2_SSO_NUM_HWS_XSTATS RTE_DIM(sso_hws_xstats) +#define OTX2_SSO_NUM_GRP_XSTATS RTE_DIM(sso_grp_xstats) + +#define OTX2_SSO_NUM_XSTATS (OTX2_SSO_NUM_HWS_XSTATS + OTX2_SSO_NUM_GRP_XSTATS) + +static int +otx2_sso_xstats_get(const struct rte_eventdev *event_dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + const unsigned int ids[], uint64_t values[], unsigned int n) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct otx2_sso_xstats_name *xstats; + struct otx2_sso_xstats_name *xstat; + struct otx2_mbox *mbox = dev->mbox; + uint32_t xstats_mode_count = 0; + uint32_t start_offset = 0; + unsigned int i; + uint64_t value; + void *req_rsp; + int rc; + + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: + return 0; + case RTE_EVENT_DEV_XSTATS_PORT: + if (queue_port_id >= (signed int)dev->nb_event_ports) + goto invalid_value; + + xstats_mode_count = OTX2_SSO_NUM_HWS_XSTATS; + xstats = sso_hws_xstats; + + req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox); + ((struct sso_info_req *)req_rsp)->hws = dev->dual_ws ? + 2 * queue_port_id : queue_port_id; + rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); + if (rc < 0) + goto invalid_value; + + if (dev->dual_ws) { + for (i = 0; i < n && i < xstats_mode_count; i++) { + xstat = &xstats[ids[i] - start_offset]; + values[i] = *(uint64_t *) + ((char *)req_rsp + xstat->offset); + values[i] = (values[i] >> xstat->shift) & + xstat->mask; + } + + req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox); + ((struct sso_info_req *)req_rsp)->hws = + (2 * queue_port_id) + 1; + rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); + if (rc < 0) + goto invalid_value; + } + + break; + case RTE_EVENT_DEV_XSTATS_QUEUE: + if (queue_port_id >= (signed int)dev->nb_event_queues) + goto invalid_value; + + xstats_mode_count = OTX2_SSO_NUM_GRP_XSTATS; + start_offset = OTX2_SSO_NUM_HWS_XSTATS; + xstats = sso_grp_xstats; + + req_rsp = otx2_mbox_alloc_msg_sso_grp_get_stats(mbox); + ((struct sso_info_req *)req_rsp)->grp = queue_port_id; + rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); + if (rc < 0) + goto invalid_value; + + break; + default: + otx2_err("Invalid mode received"); + goto invalid_value; + }; + + for (i = 0; i < n && i < xstats_mode_count; i++) { + xstat = &xstats[ids[i] - start_offset]; + value = *(uint64_t *)((char *)req_rsp + xstat->offset); + value = (value >> xstat->shift) & xstat->mask; + + if ((mode == RTE_EVENT_DEV_XSTATS_PORT) && dev->dual_ws) + values[i] += value; + else + values[i] = value; + + values[i] -= xstat->reset_snap[queue_port_id]; + } + + return i; +invalid_value: + return -EINVAL; +} + +static int +otx2_sso_xstats_reset(struct rte_eventdev *event_dev, + enum rte_event_dev_xstats_mode mode, + int16_t queue_port_id, const uint32_t ids[], uint32_t n) +{ + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + struct otx2_sso_xstats_name *xstats; + struct otx2_sso_xstats_name *xstat; + struct otx2_mbox *mbox = dev->mbox; + uint32_t xstats_mode_count = 0; + uint32_t start_offset = 0; + unsigned int i; + uint64_t value; + void *req_rsp; + int rc; + + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: + return 0; + case RTE_EVENT_DEV_XSTATS_PORT: + if (queue_port_id >= (signed int)dev->nb_event_ports) + goto invalid_value; + + xstats_mode_count = OTX2_SSO_NUM_HWS_XSTATS; + xstats = sso_hws_xstats; + + req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox); + ((struct sso_info_req *)req_rsp)->hws = dev->dual_ws ? + 2 * queue_port_id : queue_port_id; + rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); + if (rc < 0) + goto invalid_value; + + if (dev->dual_ws) { + for (i = 0; i < n && i < xstats_mode_count; i++) { + xstat = &xstats[ids[i] - start_offset]; + xstat->reset_snap[queue_port_id] = *(uint64_t *) + ((char *)req_rsp + xstat->offset); + xstat->reset_snap[queue_port_id] = + (xstat->reset_snap[queue_port_id] >> + xstat->shift) & xstat->mask; + } + + req_rsp = otx2_mbox_alloc_msg_sso_hws_get_stats(mbox); + ((struct sso_info_req *)req_rsp)->hws = + (2 * queue_port_id) + 1; + rc = otx2_mbox_process_msg(mbox, (void **)&req_rsp); + if (rc < 0) + goto invalid_value; + } + + break; + case RTE_EVENT_DEV_XSTATS_QUEUE: + if (queue_port_id >= (signed int)dev->nb_event_queues) + goto invalid_value; + + xstats_mode_count = OTX2_SSO_NUM_GRP_XSTATS; + start_offset = OTX2_SSO_NUM_HWS_XSTATS; + xstats = sso_grp_xstats; + + req_rsp = otx2_mbox_alloc_msg_sso_grp_get_stats(mbox); + ((struct sso_info_req *)req_rsp)->grp = queue_port_id; + rc = otx2_mbox_process_msg(mbox, (void *)&req_rsp); + if (rc < 0) + goto invalid_value; + + break; + default: + otx2_err("Invalid mode received"); + goto invalid_value; + }; + + for (i = 0; i < n && i < xstats_mode_count; i++) { + xstat = &xstats[ids[i] - start_offset]; + value = *(uint64_t *)((char *)req_rsp + xstat->offset); + value = (value >> xstat->shift) & xstat->mask; + + if ((mode == RTE_EVENT_DEV_XSTATS_PORT) && dev->dual_ws) + xstat->reset_snap[queue_port_id] += value; + else + xstat->reset_snap[queue_port_id] = value; + } + return i; +invalid_value: + return -EINVAL; +} + +static int +otx2_sso_xstats_get_names(const struct rte_eventdev *event_dev, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int size) +{ + struct rte_event_dev_xstats_name xstats_names_copy[OTX2_SSO_NUM_XSTATS]; + struct otx2_sso_evdev *dev = sso_pmd_priv(event_dev); + uint32_t xstats_mode_count = 0; + uint32_t start_offset = 0; + unsigned int xidx = 0; + unsigned int i; + + for (i = 0; i < OTX2_SSO_NUM_HWS_XSTATS; i++) { + snprintf(xstats_names_copy[i].name, + sizeof(xstats_names_copy[i].name), "%s", + sso_hws_xstats[i].name); + } + + for (; i < OTX2_SSO_NUM_XSTATS; i++) { + snprintf(xstats_names_copy[i].name, + sizeof(xstats_names_copy[i].name), "%s", + sso_grp_xstats[i - OTX2_SSO_NUM_HWS_XSTATS].name); + } + + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: + break; + case RTE_EVENT_DEV_XSTATS_PORT: + if (queue_port_id >= (signed int)dev->nb_event_ports) + break; + xstats_mode_count = OTX2_SSO_NUM_HWS_XSTATS; + break; + case RTE_EVENT_DEV_XSTATS_QUEUE: + if (queue_port_id >= (signed int)dev->nb_event_queues) + break; + xstats_mode_count = OTX2_SSO_NUM_GRP_XSTATS; + start_offset = OTX2_SSO_NUM_HWS_XSTATS; + break; + default: + otx2_err("Invalid mode received"); + return -EINVAL; + }; + + if (xstats_mode_count > size || !ids || !xstats_names) + return xstats_mode_count; + + for (i = 0; i < xstats_mode_count; i++) { + xidx = i + start_offset; + strncpy(xstats_names[i].name, xstats_names_copy[xidx].name, + sizeof(xstats_names[i].name)); + ids[i] = xidx; + } + + return i; +} + +#endif diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_evdev.c b/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_evdev.c new file mode 100644 index 000000000..4c24cc8a6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_evdev.c @@ -0,0 +1,783 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#include <rte_kvargs.h> +#include <rte_malloc.h> +#include <rte_mbuf_pool_ops.h> + +#include "otx2_evdev.h" +#include "otx2_tim_evdev.h" + +static struct rte_event_timer_adapter_ops otx2_tim_ops; + +static inline int +tim_get_msix_offsets(void) +{ + struct otx2_tim_evdev *dev = tim_priv_get(); + struct otx2_mbox *mbox = dev->mbox; + struct msix_offset_rsp *msix_rsp; + int i, rc; + + /* Get TIM MSIX vector offsets */ + otx2_mbox_alloc_msg_msix_offset(mbox); + rc = otx2_mbox_process_msg(mbox, (void *)&msix_rsp); + + for (i = 0; i < dev->nb_rings; i++) + dev->tim_msixoff[i] = msix_rsp->timlf_msixoff[i]; + + return rc; +} + +static void +tim_set_fp_ops(struct otx2_tim_ring *tim_ring) +{ + uint8_t prod_flag = !tim_ring->prod_type_sp; + + /* [MOD/AND] [DFB/FB] [SP][MP]*/ + const rte_event_timer_arm_burst_t arm_burst[2][2][2][2] = { +#define FP(_name, _f4, _f3, _f2, _f1, flags) \ + [_f4][_f3][_f2][_f1] = otx2_tim_arm_burst_ ## _name, +TIM_ARM_FASTPATH_MODES +#undef FP + }; + + const rte_event_timer_arm_tmo_tick_burst_t arm_tmo_burst[2][2][2] = { +#define FP(_name, _f3, _f2, _f1, flags) \ + [_f3][_f2][_f1] = otx2_tim_arm_tmo_tick_burst_ ## _name, +TIM_ARM_TMO_FASTPATH_MODES +#undef FP + }; + + otx2_tim_ops.arm_burst = + arm_burst[tim_ring->enable_stats][tim_ring->optimized] + [tim_ring->ena_dfb][prod_flag]; + otx2_tim_ops.arm_tmo_tick_burst = + arm_tmo_burst[tim_ring->enable_stats][tim_ring->optimized] + [tim_ring->ena_dfb]; + otx2_tim_ops.cancel_burst = otx2_tim_timer_cancel_burst; +} + +static void +otx2_tim_ring_info_get(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer_adapter_info *adptr_info) +{ + struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv; + + adptr_info->max_tmo_ns = tim_ring->max_tout; + adptr_info->min_resolution_ns = tim_ring->tck_nsec; + rte_memcpy(&adptr_info->conf, &adptr->data->conf, + sizeof(struct rte_event_timer_adapter_conf)); +} + +static void +tim_optimze_bkt_param(struct otx2_tim_ring *tim_ring) +{ + uint64_t tck_nsec; + uint32_t hbkts; + uint32_t lbkts; + + hbkts = rte_align32pow2(tim_ring->nb_bkts); + tck_nsec = RTE_ALIGN_MUL_CEIL(tim_ring->max_tout / (hbkts - 1), 10); + + if ((tck_nsec < TICK2NSEC(OTX2_TIM_MIN_TMO_TKS, + tim_ring->tenns_clk_freq) || + hbkts > OTX2_TIM_MAX_BUCKETS)) + hbkts = 0; + + lbkts = rte_align32prevpow2(tim_ring->nb_bkts); + tck_nsec = RTE_ALIGN_MUL_CEIL((tim_ring->max_tout / (lbkts - 1)), 10); + + if ((tck_nsec < TICK2NSEC(OTX2_TIM_MIN_TMO_TKS, + tim_ring->tenns_clk_freq) || + lbkts > OTX2_TIM_MAX_BUCKETS)) + lbkts = 0; + + if (!hbkts && !lbkts) + return; + + if (!hbkts) { + tim_ring->nb_bkts = lbkts; + goto end; + } else if (!lbkts) { + tim_ring->nb_bkts = hbkts; + goto end; + } + + tim_ring->nb_bkts = (hbkts - tim_ring->nb_bkts) < + (tim_ring->nb_bkts - lbkts) ? hbkts : lbkts; +end: + tim_ring->optimized = true; + tim_ring->tck_nsec = RTE_ALIGN_MUL_CEIL((tim_ring->max_tout / + (tim_ring->nb_bkts - 1)), 10); + otx2_tim_dbg("Optimized configured values"); + otx2_tim_dbg("Nb_bkts : %" PRIu32 "", tim_ring->nb_bkts); + otx2_tim_dbg("Tck_nsec : %" PRIu64 "", tim_ring->tck_nsec); +} + +static int +tim_chnk_pool_create(struct otx2_tim_ring *tim_ring, + struct rte_event_timer_adapter_conf *rcfg) +{ + unsigned int cache_sz = (tim_ring->nb_chunks / 1.5); + unsigned int mp_flags = 0; + char pool_name[25]; + int rc; + + cache_sz /= rte_lcore_count(); + /* Create chunk pool. */ + if (rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_SP_PUT) { + mp_flags = MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET; + otx2_tim_dbg("Using single producer mode"); + tim_ring->prod_type_sp = true; + } + + snprintf(pool_name, sizeof(pool_name), "otx2_tim_chunk_pool%d", + tim_ring->ring_id); + + if (cache_sz > RTE_MEMPOOL_CACHE_MAX_SIZE) + cache_sz = RTE_MEMPOOL_CACHE_MAX_SIZE; + + if (!tim_ring->disable_npa) { + tim_ring->chunk_pool = rte_mempool_create_empty(pool_name, + tim_ring->nb_chunks, tim_ring->chunk_sz, + cache_sz, 0, rte_socket_id(), mp_flags); + + if (tim_ring->chunk_pool == NULL) { + otx2_err("Unable to create chunkpool."); + return -ENOMEM; + } + + rc = rte_mempool_set_ops_byname(tim_ring->chunk_pool, + rte_mbuf_platform_mempool_ops(), + NULL); + if (rc < 0) { + otx2_err("Unable to set chunkpool ops"); + goto free; + } + + rc = rte_mempool_populate_default(tim_ring->chunk_pool); + if (rc < 0) { + otx2_err("Unable to set populate chunkpool."); + goto free; + } + tim_ring->aura = npa_lf_aura_handle_to_aura( + tim_ring->chunk_pool->pool_id); + tim_ring->ena_dfb = 0; + } else { + tim_ring->chunk_pool = rte_mempool_create(pool_name, + tim_ring->nb_chunks, tim_ring->chunk_sz, + cache_sz, 0, NULL, NULL, NULL, NULL, + rte_socket_id(), + mp_flags); + if (tim_ring->chunk_pool == NULL) { + otx2_err("Unable to create chunkpool."); + return -ENOMEM; + } + tim_ring->ena_dfb = 1; + } + + return 0; + +free: + rte_mempool_free(tim_ring->chunk_pool); + return rc; +} + +static void +tim_err_desc(int rc) +{ + switch (rc) { + case TIM_AF_NO_RINGS_LEFT: + otx2_err("Unable to allocat new TIM ring."); + break; + case TIM_AF_INVALID_NPA_PF_FUNC: + otx2_err("Invalid NPA pf func."); + break; + case TIM_AF_INVALID_SSO_PF_FUNC: + otx2_err("Invalid SSO pf func."); + break; + case TIM_AF_RING_STILL_RUNNING: + otx2_tim_dbg("Ring busy."); + break; + case TIM_AF_LF_INVALID: + otx2_err("Invalid Ring id."); + break; + case TIM_AF_CSIZE_NOT_ALIGNED: + otx2_err("Chunk size specified needs to be multiple of 16."); + break; + case TIM_AF_CSIZE_TOO_SMALL: + otx2_err("Chunk size too small."); + break; + case TIM_AF_CSIZE_TOO_BIG: + otx2_err("Chunk size too big."); + break; + case TIM_AF_INTERVAL_TOO_SMALL: + otx2_err("Bucket traversal interval too small."); + break; + case TIM_AF_INVALID_BIG_ENDIAN_VALUE: + otx2_err("Invalid Big endian value."); + break; + case TIM_AF_INVALID_CLOCK_SOURCE: + otx2_err("Invalid Clock source specified."); + break; + case TIM_AF_GPIO_CLK_SRC_NOT_ENABLED: + otx2_err("GPIO clock source not enabled."); + break; + case TIM_AF_INVALID_BSIZE: + otx2_err("Invalid bucket size."); + break; + case TIM_AF_INVALID_ENABLE_PERIODIC: + otx2_err("Invalid bucket size."); + break; + case TIM_AF_INVALID_ENABLE_DONTFREE: + otx2_err("Invalid Don't free value."); + break; + case TIM_AF_ENA_DONTFRE_NSET_PERIODIC: + otx2_err("Don't free bit not set when periodic is enabled."); + break; + case TIM_AF_RING_ALREADY_DISABLED: + otx2_err("Ring already stopped"); + break; + default: + otx2_err("Unknown Error."); + } +} + +static int +otx2_tim_ring_create(struct rte_event_timer_adapter *adptr) +{ + struct rte_event_timer_adapter_conf *rcfg = &adptr->data->conf; + struct otx2_tim_evdev *dev = tim_priv_get(); + struct otx2_tim_ring *tim_ring; + struct tim_config_req *cfg_req; + struct tim_ring_req *free_req; + struct tim_lf_alloc_req *req; + struct tim_lf_alloc_rsp *rsp; + int i, rc; + + if (dev == NULL) + return -ENODEV; + + if (adptr->data->id >= dev->nb_rings) + return -ENODEV; + + req = otx2_mbox_alloc_msg_tim_lf_alloc(dev->mbox); + req->npa_pf_func = otx2_npa_pf_func_get(); + req->sso_pf_func = otx2_sso_pf_func_get(); + req->ring = adptr->data->id; + + rc = otx2_mbox_process_msg(dev->mbox, (void **)&rsp); + if (rc < 0) { + tim_err_desc(rc); + return -ENODEV; + } + + if (NSEC2TICK(RTE_ALIGN_MUL_CEIL(rcfg->timer_tick_ns, 10), + rsp->tenns_clk) < OTX2_TIM_MIN_TMO_TKS) { + if (rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES) + rcfg->timer_tick_ns = TICK2NSEC(OTX2_TIM_MIN_TMO_TKS, + rsp->tenns_clk); + else { + rc = -ERANGE; + goto rng_mem_err; + } + } + + tim_ring = rte_zmalloc("otx2_tim_prv", sizeof(struct otx2_tim_ring), 0); + if (tim_ring == NULL) { + rc = -ENOMEM; + goto rng_mem_err; + } + + adptr->data->adapter_priv = tim_ring; + + tim_ring->tenns_clk_freq = rsp->tenns_clk; + tim_ring->clk_src = (int)rcfg->clk_src; + tim_ring->ring_id = adptr->data->id; + tim_ring->tck_nsec = RTE_ALIGN_MUL_CEIL(rcfg->timer_tick_ns, 10); + tim_ring->max_tout = rcfg->max_tmo_ns; + tim_ring->nb_bkts = (tim_ring->max_tout / tim_ring->tck_nsec); + tim_ring->chunk_sz = dev->chunk_sz; + tim_ring->nb_timers = rcfg->nb_timers; + tim_ring->disable_npa = dev->disable_npa; + tim_ring->enable_stats = dev->enable_stats; + + for (i = 0; i < dev->ring_ctl_cnt ; i++) { + struct otx2_tim_ctl *ring_ctl = &dev->ring_ctl_data[i]; + + if (ring_ctl->ring == tim_ring->ring_id) { + tim_ring->chunk_sz = ring_ctl->chunk_slots ? + ((uint32_t)(ring_ctl->chunk_slots + 1) * + OTX2_TIM_CHUNK_ALIGNMENT) : tim_ring->chunk_sz; + tim_ring->enable_stats = ring_ctl->enable_stats; + tim_ring->disable_npa = ring_ctl->disable_npa; + } + } + + tim_ring->nb_chunks = tim_ring->nb_timers / OTX2_TIM_NB_CHUNK_SLOTS( + tim_ring->chunk_sz); + tim_ring->nb_chunk_slots = OTX2_TIM_NB_CHUNK_SLOTS(tim_ring->chunk_sz); + + /* Try to optimize the bucket parameters. */ + if ((rcfg->flags & RTE_EVENT_TIMER_ADAPTER_F_ADJUST_RES)) { + if (rte_is_power_of_2(tim_ring->nb_bkts)) + tim_ring->optimized = true; + else + tim_optimze_bkt_param(tim_ring); + } + + if (tim_ring->disable_npa) + tim_ring->nb_chunks = tim_ring->nb_chunks * tim_ring->nb_bkts; + else + tim_ring->nb_chunks = tim_ring->nb_chunks + tim_ring->nb_bkts; + + /* Create buckets. */ + tim_ring->bkt = rte_zmalloc("otx2_tim_bucket", (tim_ring->nb_bkts) * + sizeof(struct otx2_tim_bkt), + RTE_CACHE_LINE_SIZE); + if (tim_ring->bkt == NULL) + goto bkt_mem_err; + + rc = tim_chnk_pool_create(tim_ring, rcfg); + if (rc < 0) + goto chnk_mem_err; + + cfg_req = otx2_mbox_alloc_msg_tim_config_ring(dev->mbox); + + cfg_req->ring = tim_ring->ring_id; + cfg_req->bigendian = false; + cfg_req->clocksource = tim_ring->clk_src; + cfg_req->enableperiodic = false; + cfg_req->enabledontfreebuffer = tim_ring->ena_dfb; + cfg_req->bucketsize = tim_ring->nb_bkts; + cfg_req->chunksize = tim_ring->chunk_sz; + cfg_req->interval = NSEC2TICK(tim_ring->tck_nsec, + tim_ring->tenns_clk_freq); + + rc = otx2_mbox_process(dev->mbox); + if (rc < 0) { + tim_err_desc(rc); + goto chnk_mem_err; + } + + tim_ring->base = dev->bar2 + + (RVU_BLOCK_ADDR_TIM << 20 | tim_ring->ring_id << 12); + + rc = tim_register_irq(tim_ring->ring_id); + if (rc < 0) + goto chnk_mem_err; + + otx2_write64((uint64_t)tim_ring->bkt, + tim_ring->base + TIM_LF_RING_BASE); + otx2_write64(tim_ring->aura, tim_ring->base + TIM_LF_RING_AURA); + + /* Set fastpath ops. */ + tim_set_fp_ops(tim_ring); + + /* Update SSO xae count. */ + sso_updt_xae_cnt(sso_pmd_priv(dev->event_dev), (void *)tim_ring, + RTE_EVENT_TYPE_TIMER); + sso_xae_reconfigure(dev->event_dev); + + otx2_tim_dbg("Total memory used %"PRIu64"MB\n", + (uint64_t)(((tim_ring->nb_chunks * tim_ring->chunk_sz) + + (tim_ring->nb_bkts * sizeof(struct otx2_tim_bkt))) / + BIT_ULL(20))); + + return rc; + +chnk_mem_err: + rte_free(tim_ring->bkt); +bkt_mem_err: + rte_free(tim_ring); +rng_mem_err: + free_req = otx2_mbox_alloc_msg_tim_lf_free(dev->mbox); + free_req->ring = adptr->data->id; + otx2_mbox_process(dev->mbox); + return rc; +} + +static void +otx2_tim_calibrate_start_tsc(struct otx2_tim_ring *tim_ring) +{ +#define OTX2_TIM_CALIB_ITER 1E6 + uint32_t real_bkt, bucket; + int icount, ecount = 0; + uint64_t bkt_cyc; + + for (icount = 0; icount < OTX2_TIM_CALIB_ITER; icount++) { + real_bkt = otx2_read64(tim_ring->base + TIM_LF_RING_REL) >> 44; + bkt_cyc = rte_rdtsc(); + bucket = (bkt_cyc - tim_ring->ring_start_cyc) / + tim_ring->tck_int; + bucket = bucket % (tim_ring->nb_bkts); + tim_ring->ring_start_cyc = bkt_cyc - (real_bkt * + tim_ring->tck_int); + if (bucket != real_bkt) + ecount++; + } + tim_ring->last_updt_cyc = bkt_cyc; + otx2_tim_dbg("Bucket mispredict %3.2f distance %d\n", + 100 - (((double)(icount - ecount) / (double)icount) * 100), + bucket - real_bkt); +} + +static int +otx2_tim_ring_start(const struct rte_event_timer_adapter *adptr) +{ + struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv; + struct otx2_tim_evdev *dev = tim_priv_get(); + struct tim_enable_rsp *rsp; + struct tim_ring_req *req; + int rc; + + if (dev == NULL) + return -ENODEV; + + req = otx2_mbox_alloc_msg_tim_enable_ring(dev->mbox); + req->ring = tim_ring->ring_id; + + rc = otx2_mbox_process_msg(dev->mbox, (void **)&rsp); + if (rc < 0) { + tim_err_desc(rc); + goto fail; + } +#ifdef RTE_ARM_EAL_RDTSC_USE_PMU + uint64_t tenns_stmp, tenns_diff; + uint64_t pmu_stmp; + + pmu_stmp = rte_rdtsc(); + asm volatile("mrs %0, cntvct_el0" : "=r" (tenns_stmp)); + + tenns_diff = tenns_stmp - rsp->timestarted; + pmu_stmp = pmu_stmp - (NSEC2TICK(tenns_diff * 10, rte_get_timer_hz())); + tim_ring->ring_start_cyc = pmu_stmp; +#else + tim_ring->ring_start_cyc = rsp->timestarted; +#endif + tim_ring->tck_int = NSEC2TICK(tim_ring->tck_nsec, rte_get_timer_hz()); + tim_ring->tot_int = tim_ring->tck_int * tim_ring->nb_bkts; + tim_ring->fast_div = rte_reciprocal_value_u64(tim_ring->tck_int); + + otx2_tim_calibrate_start_tsc(tim_ring); + +fail: + return rc; +} + +static int +otx2_tim_ring_stop(const struct rte_event_timer_adapter *adptr) +{ + struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv; + struct otx2_tim_evdev *dev = tim_priv_get(); + struct tim_ring_req *req; + int rc; + + if (dev == NULL) + return -ENODEV; + + req = otx2_mbox_alloc_msg_tim_disable_ring(dev->mbox); + req->ring = tim_ring->ring_id; + + rc = otx2_mbox_process(dev->mbox); + if (rc < 0) { + tim_err_desc(rc); + rc = -EBUSY; + } + + return rc; +} + +static int +otx2_tim_ring_free(struct rte_event_timer_adapter *adptr) +{ + struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv; + struct otx2_tim_evdev *dev = tim_priv_get(); + struct tim_ring_req *req; + int rc; + + if (dev == NULL) + return -ENODEV; + + tim_unregister_irq(tim_ring->ring_id); + + req = otx2_mbox_alloc_msg_tim_lf_free(dev->mbox); + req->ring = tim_ring->ring_id; + + rc = otx2_mbox_process(dev->mbox); + if (rc < 0) { + tim_err_desc(rc); + return -EBUSY; + } + + rte_free(tim_ring->bkt); + rte_mempool_free(tim_ring->chunk_pool); + rte_free(adptr->data->adapter_priv); + + return 0; +} + +static int +otx2_tim_stats_get(const struct rte_event_timer_adapter *adapter, + struct rte_event_timer_adapter_stats *stats) +{ + struct otx2_tim_ring *tim_ring = adapter->data->adapter_priv; + uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc; + + + stats->evtim_exp_count = __atomic_load_n(&tim_ring->arm_cnt, + __ATOMIC_RELAXED); + stats->ev_enq_count = stats->evtim_exp_count; + stats->adapter_tick_count = rte_reciprocal_divide_u64(bkt_cyc, + &tim_ring->fast_div); + return 0; +} + +static int +otx2_tim_stats_reset(const struct rte_event_timer_adapter *adapter) +{ + struct otx2_tim_ring *tim_ring = adapter->data->adapter_priv; + + __atomic_store_n(&tim_ring->arm_cnt, 0, __ATOMIC_RELAXED); + return 0; +} + +int +otx2_tim_caps_get(const struct rte_eventdev *evdev, uint64_t flags, + uint32_t *caps, + const struct rte_event_timer_adapter_ops **ops) +{ + struct otx2_tim_evdev *dev = tim_priv_get(); + + RTE_SET_USED(flags); + + if (dev == NULL) + return -ENODEV; + + otx2_tim_ops.init = otx2_tim_ring_create; + otx2_tim_ops.uninit = otx2_tim_ring_free; + otx2_tim_ops.start = otx2_tim_ring_start; + otx2_tim_ops.stop = otx2_tim_ring_stop; + otx2_tim_ops.get_info = otx2_tim_ring_info_get; + + if (dev->enable_stats) { + otx2_tim_ops.stats_get = otx2_tim_stats_get; + otx2_tim_ops.stats_reset = otx2_tim_stats_reset; + } + + /* Store evdev pointer for later use. */ + dev->event_dev = (struct rte_eventdev *)(uintptr_t)evdev; + *caps = RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT; + *ops = &otx2_tim_ops; + + return 0; +} + +#define OTX2_TIM_DISABLE_NPA "tim_disable_npa" +#define OTX2_TIM_CHNK_SLOTS "tim_chnk_slots" +#define OTX2_TIM_STATS_ENA "tim_stats_ena" +#define OTX2_TIM_RINGS_LMT "tim_rings_lmt" +#define OTX2_TIM_RING_CTL "tim_ring_ctl" + +static void +tim_parse_ring_param(char *value, void *opaque) +{ + struct otx2_tim_evdev *dev = opaque; + struct otx2_tim_ctl ring_ctl = {0}; + char *tok = strtok(value, "-"); + struct otx2_tim_ctl *old_ptr; + uint16_t *val; + + val = (uint16_t *)&ring_ctl; + + if (!strlen(value)) + return; + + while (tok != NULL) { + *val = atoi(tok); + tok = strtok(NULL, "-"); + val++; + } + + if (val != (&ring_ctl.enable_stats + 1)) { + otx2_err( + "Invalid ring param expected [ring-chunk_sz-disable_npa-enable_stats]"); + return; + } + + dev->ring_ctl_cnt++; + old_ptr = dev->ring_ctl_data; + dev->ring_ctl_data = rte_realloc(dev->ring_ctl_data, + sizeof(struct otx2_tim_ctl) * + dev->ring_ctl_cnt, 0); + if (dev->ring_ctl_data == NULL) { + dev->ring_ctl_data = old_ptr; + dev->ring_ctl_cnt--; + return; + } + + dev->ring_ctl_data[dev->ring_ctl_cnt - 1] = ring_ctl; +} + +static void +tim_parse_ring_ctl_list(const char *value, void *opaque) +{ + char *s = strdup(value); + char *start = NULL; + char *end = NULL; + char *f = s; + + while (*s) { + if (*s == '[') + start = s; + else if (*s == ']') + end = s; + + if (start && start < end) { + *end = 0; + tim_parse_ring_param(start + 1, opaque); + start = end; + s = end; + } + s++; + } + + free(f); +} + +static int +tim_parse_kvargs_dict(const char *key, const char *value, void *opaque) +{ + RTE_SET_USED(key); + + /* Dict format [ring-chunk_sz-disable_npa-enable_stats] use '-' as ',' + * isn't allowed. 0 represents default. + */ + tim_parse_ring_ctl_list(value, opaque); + + return 0; +} + +static void +tim_parse_devargs(struct rte_devargs *devargs, struct otx2_tim_evdev *dev) +{ + struct rte_kvargs *kvlist; + + if (devargs == NULL) + return; + + kvlist = rte_kvargs_parse(devargs->args, NULL); + if (kvlist == NULL) + return; + + rte_kvargs_process(kvlist, OTX2_TIM_DISABLE_NPA, + &parse_kvargs_flag, &dev->disable_npa); + rte_kvargs_process(kvlist, OTX2_TIM_CHNK_SLOTS, + &parse_kvargs_value, &dev->chunk_slots); + rte_kvargs_process(kvlist, OTX2_TIM_STATS_ENA, &parse_kvargs_flag, + &dev->enable_stats); + rte_kvargs_process(kvlist, OTX2_TIM_RINGS_LMT, &parse_kvargs_value, + &dev->min_ring_cnt); + rte_kvargs_process(kvlist, OTX2_TIM_RING_CTL, + &tim_parse_kvargs_dict, &dev); + + rte_kvargs_free(kvlist); +} + +void +otx2_tim_init(struct rte_pci_device *pci_dev, struct otx2_dev *cmn_dev) +{ + struct rsrc_attach_req *atch_req; + struct rsrc_detach_req *dtch_req; + struct free_rsrcs_rsp *rsrc_cnt; + const struct rte_memzone *mz; + struct otx2_tim_evdev *dev; + int rc; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + + mz = rte_memzone_reserve(RTE_STR(OTX2_TIM_EVDEV_NAME), + sizeof(struct otx2_tim_evdev), + rte_socket_id(), 0); + if (mz == NULL) { + otx2_tim_dbg("Unable to allocate memory for TIM Event device"); + return; + } + + dev = mz->addr; + dev->pci_dev = pci_dev; + dev->mbox = cmn_dev->mbox; + dev->bar2 = cmn_dev->bar2; + + tim_parse_devargs(pci_dev->device.devargs, dev); + + otx2_mbox_alloc_msg_free_rsrc_cnt(dev->mbox); + rc = otx2_mbox_process_msg(dev->mbox, (void *)&rsrc_cnt); + if (rc < 0) { + otx2_err("Unable to get free rsrc count."); + goto mz_free; + } + + dev->nb_rings = dev->min_ring_cnt ? + RTE_MIN(dev->min_ring_cnt, rsrc_cnt->tim) : rsrc_cnt->tim; + + if (!dev->nb_rings) { + otx2_tim_dbg("No TIM Logical functions provisioned."); + goto mz_free; + } + + atch_req = otx2_mbox_alloc_msg_attach_resources(dev->mbox); + atch_req->modify = true; + atch_req->timlfs = dev->nb_rings; + + rc = otx2_mbox_process(dev->mbox); + if (rc < 0) { + otx2_err("Unable to attach TIM rings."); + goto mz_free; + } + + rc = tim_get_msix_offsets(); + if (rc < 0) { + otx2_err("Unable to get MSIX offsets for TIM."); + goto detach; + } + + if (dev->chunk_slots && + dev->chunk_slots <= OTX2_TIM_MAX_CHUNK_SLOTS && + dev->chunk_slots >= OTX2_TIM_MIN_CHUNK_SLOTS) { + dev->chunk_sz = (dev->chunk_slots + 1) * + OTX2_TIM_CHUNK_ALIGNMENT; + } else { + dev->chunk_sz = OTX2_TIM_RING_DEF_CHUNK_SZ; + } + + return; + +detach: + dtch_req = otx2_mbox_alloc_msg_detach_resources(dev->mbox); + dtch_req->partial = true; + dtch_req->timlfs = true; + + otx2_mbox_process(dev->mbox); +mz_free: + rte_memzone_free(mz); +} + +void +otx2_tim_fini(void) +{ + struct otx2_tim_evdev *dev = tim_priv_get(); + struct rsrc_detach_req *dtch_req; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return; + + dtch_req = otx2_mbox_alloc_msg_detach_resources(dev->mbox); + dtch_req->partial = true; + dtch_req->timlfs = true; + + otx2_mbox_process(dev->mbox); + rte_memzone_free(rte_memzone_lookup(RTE_STR(OTX2_TIM_EVDEV_NAME))); +} diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_evdev.h b/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_evdev.h new file mode 100644 index 000000000..44e3c7b51 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_evdev.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#ifndef __OTX2_TIM_EVDEV_H__ +#define __OTX2_TIM_EVDEV_H__ + +#include <rte_event_timer_adapter.h> +#include <rte_event_timer_adapter_pmd.h> +#include <rte_reciprocal.h> + +#include "otx2_dev.h" + +#define OTX2_TIM_EVDEV_NAME otx2_tim_eventdev + +#define otx2_tim_func_trace otx2_tim_dbg + +#define TIM_LF_RING_AURA (0x0) +#define TIM_LF_RING_BASE (0x130) +#define TIM_LF_NRSPERR_INT (0x200) +#define TIM_LF_NRSPERR_INT_W1S (0x208) +#define TIM_LF_NRSPERR_INT_ENA_W1S (0x210) +#define TIM_LF_NRSPERR_INT_ENA_W1C (0x218) +#define TIM_LF_RAS_INT (0x300) +#define TIM_LF_RAS_INT_W1S (0x308) +#define TIM_LF_RAS_INT_ENA_W1S (0x310) +#define TIM_LF_RAS_INT_ENA_W1C (0x318) +#define TIM_LF_RING_REL (0x400) + +#define TIM_BUCKET_W1_S_CHUNK_REMAINDER (48) +#define TIM_BUCKET_W1_M_CHUNK_REMAINDER ((1ULL << (64 - \ + TIM_BUCKET_W1_S_CHUNK_REMAINDER)) - 1) +#define TIM_BUCKET_W1_S_LOCK (40) +#define TIM_BUCKET_W1_M_LOCK ((1ULL << \ + (TIM_BUCKET_W1_S_CHUNK_REMAINDER - \ + TIM_BUCKET_W1_S_LOCK)) - 1) +#define TIM_BUCKET_W1_S_RSVD (35) +#define TIM_BUCKET_W1_S_BSK (34) +#define TIM_BUCKET_W1_M_BSK ((1ULL << \ + (TIM_BUCKET_W1_S_RSVD - \ + TIM_BUCKET_W1_S_BSK)) - 1) +#define TIM_BUCKET_W1_S_HBT (33) +#define TIM_BUCKET_W1_M_HBT ((1ULL << \ + (TIM_BUCKET_W1_S_BSK - \ + TIM_BUCKET_W1_S_HBT)) - 1) +#define TIM_BUCKET_W1_S_SBT (32) +#define TIM_BUCKET_W1_M_SBT ((1ULL << \ + (TIM_BUCKET_W1_S_HBT - \ + TIM_BUCKET_W1_S_SBT)) - 1) +#define TIM_BUCKET_W1_S_NUM_ENTRIES (0) +#define TIM_BUCKET_W1_M_NUM_ENTRIES ((1ULL << \ + (TIM_BUCKET_W1_S_SBT - \ + TIM_BUCKET_W1_S_NUM_ENTRIES)) - 1) + +#define TIM_BUCKET_SEMA (TIM_BUCKET_CHUNK_REMAIN) + +#define TIM_BUCKET_CHUNK_REMAIN \ + (TIM_BUCKET_W1_M_CHUNK_REMAINDER << TIM_BUCKET_W1_S_CHUNK_REMAINDER) + +#define TIM_BUCKET_LOCK \ + (TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK) + +#define TIM_BUCKET_SEMA_WLOCK \ + (TIM_BUCKET_CHUNK_REMAIN | (1ull << TIM_BUCKET_W1_S_LOCK)) + +#define OTX2_MAX_TIM_RINGS (256) +#define OTX2_TIM_MAX_BUCKETS (0xFFFFF) +#define OTX2_TIM_RING_DEF_CHUNK_SZ (4096) +#define OTX2_TIM_CHUNK_ALIGNMENT (16) +#define OTX2_TIM_MAX_BURST (RTE_CACHE_LINE_SIZE / \ + OTX2_TIM_CHUNK_ALIGNMENT) +#define OTX2_TIM_NB_CHUNK_SLOTS(sz) (((sz) / OTX2_TIM_CHUNK_ALIGNMENT) - 1) +#define OTX2_TIM_MIN_CHUNK_SLOTS (0x1) +#define OTX2_TIM_MAX_CHUNK_SLOTS (0x1FFE) +#define OTX2_TIM_MIN_TMO_TKS (256) + +#define OTX2_TIM_SP 0x1 +#define OTX2_TIM_MP 0x2 +#define OTX2_TIM_BKT_AND 0x4 +#define OTX2_TIM_BKT_MOD 0x8 +#define OTX2_TIM_ENA_FB 0x10 +#define OTX2_TIM_ENA_DFB 0x20 +#define OTX2_TIM_ENA_STATS 0x40 + +enum otx2_tim_clk_src { + OTX2_TIM_CLK_SRC_10NS = RTE_EVENT_TIMER_ADAPTER_CPU_CLK, + OTX2_TIM_CLK_SRC_GPIO = RTE_EVENT_TIMER_ADAPTER_EXT_CLK0, + OTX2_TIM_CLK_SRC_GTI = RTE_EVENT_TIMER_ADAPTER_EXT_CLK1, + OTX2_TIM_CLK_SRC_PTP = RTE_EVENT_TIMER_ADAPTER_EXT_CLK2, +}; + +struct otx2_tim_bkt { + uint64_t first_chunk; + union { + uint64_t w1; + struct { + uint32_t nb_entry; + uint8_t sbt:1; + uint8_t hbt:1; + uint8_t bsk:1; + uint8_t rsvd:5; + uint8_t lock; + int16_t chunk_remainder; + }; + }; + uint64_t current_chunk; + uint64_t pad; +} __rte_packed __rte_aligned(32); + +struct otx2_tim_ent { + uint64_t w0; + uint64_t wqe; +} __rte_packed; + +struct otx2_tim_ctl { + uint16_t ring; + uint16_t chunk_slots; + uint16_t disable_npa; + uint16_t enable_stats; +}; + +struct otx2_tim_evdev { + struct rte_pci_device *pci_dev; + struct rte_eventdev *event_dev; + struct otx2_mbox *mbox; + uint16_t nb_rings; + uint32_t chunk_sz; + uintptr_t bar2; + /* Dev args */ + uint8_t disable_npa; + uint16_t chunk_slots; + uint16_t min_ring_cnt; + uint8_t enable_stats; + uint16_t ring_ctl_cnt; + struct otx2_tim_ctl *ring_ctl_data; + /* HW const */ + /* MSIX offsets */ + uint16_t tim_msixoff[OTX2_MAX_TIM_RINGS]; +}; + +struct otx2_tim_ring { + uintptr_t base; + uint16_t nb_chunk_slots; + uint32_t nb_bkts; + uint64_t last_updt_cyc; + uint64_t ring_start_cyc; + uint64_t tck_int; + uint64_t tot_int; + struct otx2_tim_bkt *bkt; + struct rte_mempool *chunk_pool; + struct rte_reciprocal_u64 fast_div; + uint64_t arm_cnt; + uint8_t prod_type_sp; + uint8_t enable_stats; + uint8_t disable_npa; + uint8_t optimized; + uint8_t ena_dfb; + uint16_t ring_id; + uint32_t aura; + uint64_t nb_timers; + uint64_t tck_nsec; + uint64_t max_tout; + uint64_t nb_chunks; + uint64_t chunk_sz; + uint64_t tenns_clk_freq; + enum otx2_tim_clk_src clk_src; +} __rte_cache_aligned; + +static inline struct otx2_tim_evdev * +tim_priv_get(void) +{ + const struct rte_memzone *mz; + + mz = rte_memzone_lookup(RTE_STR(OTX2_TIM_EVDEV_NAME)); + if (mz == NULL) + return NULL; + + return mz->addr; +} + +#define TIM_ARM_FASTPATH_MODES \ +FP(mod_sp, 0, 0, 0, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ +FP(mod_mp, 0, 0, 0, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ +FP(mod_fb_sp, 0, 0, 1, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ +FP(mod_fb_mp, 0, 0, 1, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB | OTX2_TIM_MP) \ +FP(and_sp, 0, 1, 0, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ +FP(and_mp, 0, 1, 0, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ +FP(and_fb_sp, 0, 1, 1, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ +FP(and_fb_mp, 0, 1, 1, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB | OTX2_TIM_MP) \ +FP(stats_mod_sp, 1, 0, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ + OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ +FP(stats_mod_mp, 1, 0, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ + OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ +FP(stats_mod_fb_sp, 1, 0, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ + OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ +FP(stats_mod_fb_mp, 1, 0, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ + OTX2_TIM_ENA_FB | OTX2_TIM_MP) \ +FP(stats_and_sp, 1, 1, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ + OTX2_TIM_ENA_DFB | OTX2_TIM_SP) \ +FP(stats_and_mp, 1, 1, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ + OTX2_TIM_ENA_DFB | OTX2_TIM_MP) \ +FP(stats_and_fb_sp, 1, 1, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ + OTX2_TIM_ENA_FB | OTX2_TIM_SP) \ +FP(stats_and_fb_mp, 1, 1, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ + OTX2_TIM_ENA_FB | OTX2_TIM_MP) + +#define TIM_ARM_TMO_FASTPATH_MODES \ +FP(mod, 0, 0, 0, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_DFB) \ +FP(mod_fb, 0, 0, 1, OTX2_TIM_BKT_MOD | OTX2_TIM_ENA_FB) \ +FP(and, 0, 1, 0, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_DFB) \ +FP(and_fb, 0, 1, 1, OTX2_TIM_BKT_AND | OTX2_TIM_ENA_FB) \ +FP(stats_mod, 1, 0, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ + OTX2_TIM_ENA_DFB) \ +FP(stats_mod_fb, 1, 0, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_MOD | \ + OTX2_TIM_ENA_FB) \ +FP(stats_and, 1, 1, 0, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ + OTX2_TIM_ENA_DFB) \ +FP(stats_and_fb, 1, 1, 1, OTX2_TIM_ENA_STATS | OTX2_TIM_BKT_AND | \ + OTX2_TIM_ENA_FB) + +#define FP(_name, _f4, _f3, _f2, _f1, flags) \ +uint16_t \ +otx2_tim_arm_burst_ ## _name(const struct rte_event_timer_adapter *adptr, \ + struct rte_event_timer **tim, \ + const uint16_t nb_timers); +TIM_ARM_FASTPATH_MODES +#undef FP + +#define FP(_name, _f3, _f2, _f1, flags) \ +uint16_t \ +otx2_tim_arm_tmo_tick_burst_ ## _name( \ + const struct rte_event_timer_adapter *adptr, \ + struct rte_event_timer **tim, \ + const uint64_t timeout_tick, const uint16_t nb_timers); +TIM_ARM_TMO_FASTPATH_MODES +#undef FP + +uint16_t otx2_tim_timer_cancel_burst( + const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, const uint16_t nb_timers); + +int otx2_tim_caps_get(const struct rte_eventdev *dev, uint64_t flags, + uint32_t *caps, + const struct rte_event_timer_adapter_ops **ops); + +void otx2_tim_init(struct rte_pci_device *pci_dev, struct otx2_dev *cmn_dev); +void otx2_tim_fini(void); + +/* TIM IRQ */ +int tim_register_irq(uint16_t ring_id); +void tim_unregister_irq(uint16_t ring_id); + +#endif /* __OTX2_TIM_EVDEV_H__ */ diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_worker.c b/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_worker.c new file mode 100644 index 000000000..4b5cfdc72 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_worker.c @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#include "otx2_tim_evdev.h" +#include "otx2_tim_worker.h" + +static inline int +tim_arm_checks(const struct otx2_tim_ring * const tim_ring, + struct rte_event_timer * const tim) +{ + if (unlikely(tim->state)) { + tim->state = RTE_EVENT_TIMER_ERROR; + rte_errno = EALREADY; + goto fail; + } + + if (unlikely(!tim->timeout_ticks || + tim->timeout_ticks >= tim_ring->nb_bkts)) { + tim->state = tim->timeout_ticks ? RTE_EVENT_TIMER_ERROR_TOOLATE + : RTE_EVENT_TIMER_ERROR_TOOEARLY; + rte_errno = EINVAL; + goto fail; + } + + return 0; + +fail: + return -EINVAL; +} + +static inline void +tim_format_event(const struct rte_event_timer * const tim, + struct otx2_tim_ent * const entry) +{ + entry->w0 = (tim->ev.event & 0xFFC000000000) >> 6 | + (tim->ev.event & 0xFFFFFFFFF); + entry->wqe = tim->ev.u64; +} + +static inline void +tim_sync_start_cyc(struct otx2_tim_ring *tim_ring) +{ + uint64_t cur_cyc = rte_rdtsc(); + uint32_t real_bkt; + + if (cur_cyc - tim_ring->last_updt_cyc > tim_ring->tot_int) { + real_bkt = otx2_read64(tim_ring->base + TIM_LF_RING_REL) >> 44; + cur_cyc = rte_rdtsc(); + + tim_ring->ring_start_cyc = cur_cyc - + (real_bkt * tim_ring->tck_int); + tim_ring->last_updt_cyc = cur_cyc; + } + +} + +static __rte_always_inline uint16_t +tim_timer_arm_burst(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, + const uint16_t nb_timers, + const uint8_t flags) +{ + struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv; + struct otx2_tim_ent entry; + uint16_t index; + int ret; + + tim_sync_start_cyc(tim_ring); + for (index = 0; index < nb_timers; index++) { + if (tim_arm_checks(tim_ring, tim[index])) + break; + + tim_format_event(tim[index], &entry); + if (flags & OTX2_TIM_SP) + ret = tim_add_entry_sp(tim_ring, + tim[index]->timeout_ticks, + tim[index], &entry, flags); + if (flags & OTX2_TIM_MP) + ret = tim_add_entry_mp(tim_ring, + tim[index]->timeout_ticks, + tim[index], &entry, flags); + + if (unlikely(ret)) { + rte_errno = -ret; + break; + } + } + + if (flags & OTX2_TIM_ENA_STATS) + __atomic_fetch_add(&tim_ring->arm_cnt, index, __ATOMIC_RELAXED); + + return index; +} + +static __rte_always_inline uint16_t +tim_timer_arm_tmo_brst(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, + const uint64_t timeout_tick, + const uint16_t nb_timers, const uint8_t flags) +{ + struct otx2_tim_ent entry[OTX2_TIM_MAX_BURST] __rte_cache_aligned; + struct otx2_tim_ring *tim_ring = adptr->data->adapter_priv; + uint16_t set_timers = 0; + uint16_t arr_idx = 0; + uint16_t idx; + int ret; + + if (unlikely(!timeout_tick || timeout_tick >= tim_ring->nb_bkts)) { + const enum rte_event_timer_state state = timeout_tick ? + RTE_EVENT_TIMER_ERROR_TOOLATE : + RTE_EVENT_TIMER_ERROR_TOOEARLY; + for (idx = 0; idx < nb_timers; idx++) + tim[idx]->state = state; + + rte_errno = EINVAL; + return 0; + } + + tim_sync_start_cyc(tim_ring); + while (arr_idx < nb_timers) { + for (idx = 0; idx < OTX2_TIM_MAX_BURST && (arr_idx < nb_timers); + idx++, arr_idx++) { + tim_format_event(tim[arr_idx], &entry[idx]); + } + ret = tim_add_entry_brst(tim_ring, timeout_tick, + &tim[set_timers], entry, idx, flags); + set_timers += ret; + if (ret != idx) + break; + } + if (flags & OTX2_TIM_ENA_STATS) + __atomic_fetch_add(&tim_ring->arm_cnt, set_timers, + __ATOMIC_RELAXED); + + return set_timers; +} + +#define FP(_name, _f4, _f3, _f2, _f1, _flags) \ +uint16_t __rte_noinline \ +otx2_tim_arm_burst_ ## _name(const struct rte_event_timer_adapter *adptr, \ + struct rte_event_timer **tim, \ + const uint16_t nb_timers) \ +{ \ + return tim_timer_arm_burst(adptr, tim, nb_timers, _flags); \ +} +TIM_ARM_FASTPATH_MODES +#undef FP + +#define FP(_name, _f3, _f2, _f1, _flags) \ +uint16_t __rte_noinline \ +otx2_tim_arm_tmo_tick_burst_ ## _name( \ + const struct rte_event_timer_adapter *adptr, \ + struct rte_event_timer **tim, \ + const uint64_t timeout_tick, \ + const uint16_t nb_timers) \ +{ \ + return tim_timer_arm_tmo_brst(adptr, tim, timeout_tick, \ + nb_timers, _flags); \ +} +TIM_ARM_TMO_FASTPATH_MODES +#undef FP + +uint16_t +otx2_tim_timer_cancel_burst(const struct rte_event_timer_adapter *adptr, + struct rte_event_timer **tim, + const uint16_t nb_timers) +{ + uint16_t index; + int ret; + + RTE_SET_USED(adptr); + for (index = 0; index < nb_timers; index++) { + if (tim[index]->state == RTE_EVENT_TIMER_CANCELED) { + rte_errno = EALREADY; + break; + } + + if (tim[index]->state != RTE_EVENT_TIMER_ARMED) { + rte_errno = EINVAL; + break; + } + ret = tim_rm_entry(tim[index]); + if (ret) { + rte_errno = -ret; + break; + } + } + + return index; +} diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_worker.h b/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_worker.h new file mode 100644 index 000000000..af2f864d7 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_tim_worker.h @@ -0,0 +1,583 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#ifndef __OTX2_TIM_WORKER_H__ +#define __OTX2_TIM_WORKER_H__ + +#include "otx2_tim_evdev.h" + +static inline uint8_t +tim_bkt_fetch_lock(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_LOCK) & + TIM_BUCKET_W1_M_LOCK; +} + +static inline int16_t +tim_bkt_fetch_rem(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_CHUNK_REMAINDER) & + TIM_BUCKET_W1_M_CHUNK_REMAINDER; +} + +static inline int16_t +tim_bkt_get_rem(struct otx2_tim_bkt *bktp) +{ + return __atomic_load_n(&bktp->chunk_remainder, __ATOMIC_ACQUIRE); +} + +static inline void +tim_bkt_set_rem(struct otx2_tim_bkt *bktp, uint16_t v) +{ + __atomic_store_n(&bktp->chunk_remainder, v, __ATOMIC_RELAXED); +} + +static inline void +tim_bkt_sub_rem(struct otx2_tim_bkt *bktp, uint16_t v) +{ + __atomic_fetch_sub(&bktp->chunk_remainder, v, __ATOMIC_RELAXED); +} + +static inline uint8_t +tim_bkt_get_hbt(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_HBT) & TIM_BUCKET_W1_M_HBT; +} + +static inline uint8_t +tim_bkt_get_bsk(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_BSK) & TIM_BUCKET_W1_M_BSK; +} + +static inline uint64_t +tim_bkt_clr_bsk(struct otx2_tim_bkt *bktp) +{ + /* Clear everything except lock. */ + const uint64_t v = TIM_BUCKET_W1_M_LOCK << TIM_BUCKET_W1_S_LOCK; + + return __atomic_fetch_and(&bktp->w1, v, __ATOMIC_ACQ_REL); +} + +static inline uint64_t +tim_bkt_fetch_sema_lock(struct otx2_tim_bkt *bktp) +{ + return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA_WLOCK, + __ATOMIC_ACQUIRE); +} + +static inline uint64_t +tim_bkt_fetch_sema(struct otx2_tim_bkt *bktp) +{ + return __atomic_fetch_add(&bktp->w1, TIM_BUCKET_SEMA, __ATOMIC_RELAXED); +} + +static inline uint64_t +tim_bkt_inc_lock(struct otx2_tim_bkt *bktp) +{ + const uint64_t v = 1ull << TIM_BUCKET_W1_S_LOCK; + + return __atomic_fetch_add(&bktp->w1, v, __ATOMIC_ACQUIRE); +} + +static inline void +tim_bkt_dec_lock(struct otx2_tim_bkt *bktp) +{ + __atomic_add_fetch(&bktp->lock, 0xff, __ATOMIC_RELEASE); +} + +static inline uint32_t +tim_bkt_get_nent(uint64_t w1) +{ + return (w1 >> TIM_BUCKET_W1_S_NUM_ENTRIES) & + TIM_BUCKET_W1_M_NUM_ENTRIES; +} + +static inline void +tim_bkt_inc_nent(struct otx2_tim_bkt *bktp) +{ + __atomic_add_fetch(&bktp->nb_entry, 1, __ATOMIC_RELAXED); +} + +static inline void +tim_bkt_add_nent(struct otx2_tim_bkt *bktp, uint32_t v) +{ + __atomic_add_fetch(&bktp->nb_entry, v, __ATOMIC_RELAXED); +} + +static inline uint64_t +tim_bkt_clr_nent(struct otx2_tim_bkt *bktp) +{ + const uint64_t v = ~(TIM_BUCKET_W1_M_NUM_ENTRIES << + TIM_BUCKET_W1_S_NUM_ENTRIES); + + return __atomic_and_fetch(&bktp->w1, v, __ATOMIC_ACQ_REL); +} + +static __rte_always_inline void +tim_get_target_bucket(struct otx2_tim_ring * const tim_ring, + const uint32_t rel_bkt, struct otx2_tim_bkt **bkt, + struct otx2_tim_bkt **mirr_bkt, const uint8_t flag) +{ + const uint64_t bkt_cyc = rte_rdtsc() - tim_ring->ring_start_cyc; + uint32_t bucket = rte_reciprocal_divide_u64(bkt_cyc, + &tim_ring->fast_div) + rel_bkt; + uint32_t mirr_bucket = 0; + + if (flag & OTX2_TIM_BKT_MOD) { + bucket = bucket % tim_ring->nb_bkts; + mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) % + tim_ring->nb_bkts; + } + if (flag & OTX2_TIM_BKT_AND) { + bucket = bucket & (tim_ring->nb_bkts - 1); + mirr_bucket = (bucket + (tim_ring->nb_bkts >> 1)) & + (tim_ring->nb_bkts - 1); + } + + *bkt = &tim_ring->bkt[bucket]; + *mirr_bkt = &tim_ring->bkt[mirr_bucket]; +} + +static struct otx2_tim_ent * +tim_clr_bkt(struct otx2_tim_ring * const tim_ring, + struct otx2_tim_bkt * const bkt) +{ +#define TIM_MAX_OUTSTANDING_OBJ 64 + void *pend_chunks[TIM_MAX_OUTSTANDING_OBJ]; + struct otx2_tim_ent *chunk; + struct otx2_tim_ent *pnext; + uint8_t objs = 0; + + + chunk = ((struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk); + chunk = (struct otx2_tim_ent *)(uintptr_t)(chunk + + tim_ring->nb_chunk_slots)->w0; + while (chunk) { + pnext = (struct otx2_tim_ent *)(uintptr_t) + ((chunk + tim_ring->nb_chunk_slots)->w0); + if (objs == TIM_MAX_OUTSTANDING_OBJ) { + rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks, + objs); + objs = 0; + } + pend_chunks[objs++] = chunk; + chunk = pnext; + } + + if (objs) + rte_mempool_put_bulk(tim_ring->chunk_pool, pend_chunks, + objs); + + return (struct otx2_tim_ent *)(uintptr_t)bkt->first_chunk; +} + +static struct otx2_tim_ent * +tim_refill_chunk(struct otx2_tim_bkt * const bkt, + struct otx2_tim_bkt * const mirr_bkt, + struct otx2_tim_ring * const tim_ring) +{ + struct otx2_tim_ent *chunk; + + if (bkt->nb_entry || !bkt->first_chunk) { + if (unlikely(rte_mempool_get(tim_ring->chunk_pool, + (void **)&chunk))) + return NULL; + if (bkt->nb_entry) { + *(uint64_t *)(((struct otx2_tim_ent *) + mirr_bkt->current_chunk) + + tim_ring->nb_chunk_slots) = + (uintptr_t)chunk; + } else { + bkt->first_chunk = (uintptr_t)chunk; + } + } else { + chunk = tim_clr_bkt(tim_ring, bkt); + bkt->first_chunk = (uintptr_t)chunk; + } + *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0; + + return chunk; +} + +static struct otx2_tim_ent * +tim_insert_chunk(struct otx2_tim_bkt * const bkt, + struct otx2_tim_bkt * const mirr_bkt, + struct otx2_tim_ring * const tim_ring) +{ + struct otx2_tim_ent *chunk; + + if (unlikely(rte_mempool_get(tim_ring->chunk_pool, (void **)&chunk))) + return NULL; + + *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0; + if (bkt->nb_entry) { + *(uint64_t *)(((struct otx2_tim_ent *)(uintptr_t) + mirr_bkt->current_chunk) + + tim_ring->nb_chunk_slots) = (uintptr_t)chunk; + } else { + bkt->first_chunk = (uintptr_t)chunk; + } + return chunk; +} + +static __rte_always_inline int +tim_add_entry_sp(struct otx2_tim_ring * const tim_ring, + const uint32_t rel_bkt, + struct rte_event_timer * const tim, + const struct otx2_tim_ent * const pent, + const uint8_t flags) +{ + struct otx2_tim_bkt *mirr_bkt; + struct otx2_tim_ent *chunk; + struct otx2_tim_bkt *bkt; + uint64_t lock_sema; + int16_t rem; + +__retry: + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); + + /* Get Bucket sema*/ + lock_sema = tim_bkt_fetch_sema_lock(bkt); + + /* Bucket related checks. */ + if (unlikely(tim_bkt_get_hbt(lock_sema))) { + if (tim_bkt_get_nent(lock_sema) != 0) { + uint64_t hbt_state; +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldaxr %[hbt], [%[w1]] \n" + " tbz %[hbt], 33, dne%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldaxr %[hbt], [%[w1]] \n" + " tbnz %[hbt], 33, rty%= \n" + "dne%=: \n" + : [hbt] "=&r" (hbt_state) + : [w1] "r" ((&bkt->w1)) + : "memory" + ); +#else + do { + hbt_state = __atomic_load_n(&bkt->w1, + __ATOMIC_ACQUIRE); + } while (hbt_state & BIT_ULL(33)); +#endif + + if (!(hbt_state & BIT_ULL(34))) { + tim_bkt_dec_lock(bkt); + goto __retry; + } + } + } + /* Insert the work. */ + rem = tim_bkt_fetch_rem(lock_sema); + + if (!rem) { + if (flags & OTX2_TIM_ENA_FB) + chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring); + if (flags & OTX2_TIM_ENA_DFB) + chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring); + + if (unlikely(chunk == NULL)) { + bkt->chunk_remainder = 0; + tim_bkt_dec_lock(bkt); + tim->impl_opaque[0] = 0; + tim->impl_opaque[1] = 0; + tim->state = RTE_EVENT_TIMER_ERROR; + return -ENOMEM; + } + mirr_bkt->current_chunk = (uintptr_t)chunk; + bkt->chunk_remainder = tim_ring->nb_chunk_slots - 1; + } else { + chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk; + chunk += tim_ring->nb_chunk_slots - rem; + } + + /* Copy work entry. */ + *chunk = *pent; + + tim_bkt_inc_nent(bkt); + tim_bkt_dec_lock(bkt); + + tim->impl_opaque[0] = (uintptr_t)chunk; + tim->impl_opaque[1] = (uintptr_t)bkt; + tim->state = RTE_EVENT_TIMER_ARMED; + + return 0; +} + +static __rte_always_inline int +tim_add_entry_mp(struct otx2_tim_ring * const tim_ring, + const uint32_t rel_bkt, + struct rte_event_timer * const tim, + const struct otx2_tim_ent * const pent, + const uint8_t flags) +{ + struct otx2_tim_bkt *mirr_bkt; + struct otx2_tim_ent *chunk; + struct otx2_tim_bkt *bkt; + uint64_t lock_sema; + int16_t rem; + +__retry: + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); + /* Get Bucket sema*/ + lock_sema = tim_bkt_fetch_sema_lock(bkt); + + /* Bucket related checks. */ + if (unlikely(tim_bkt_get_hbt(lock_sema))) { + if (tim_bkt_get_nent(lock_sema) != 0) { + uint64_t hbt_state; +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldaxr %[hbt], [%[w1]] \n" + " tbz %[hbt], 33, dne%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldaxr %[hbt], [%[w1]] \n" + " tbnz %[hbt], 33, rty%= \n" + "dne%=: \n" + : [hbt] "=&r" (hbt_state) + : [w1] "r" ((&bkt->w1)) + : "memory" + ); +#else + do { + hbt_state = __atomic_load_n(&bkt->w1, + __ATOMIC_ACQUIRE); + } while (hbt_state & BIT_ULL(33)); +#endif + + if (!(hbt_state & BIT_ULL(34))) { + tim_bkt_dec_lock(bkt); + goto __retry; + } + } + } + + rem = tim_bkt_fetch_rem(lock_sema); + if (rem < 0) { +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldaxrh %w[rem], [%[crem]] \n" + " tbz %w[rem], 15, dne%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldaxrh %w[rem], [%[crem]] \n" + " tbnz %w[rem], 15, rty%= \n" + "dne%=: \n" + : [rem] "=&r" (rem) + : [crem] "r" (&bkt->chunk_remainder) + : "memory" + ); +#else + while (__atomic_load_n(&bkt->chunk_remainder, + __ATOMIC_ACQUIRE) < 0) + ; +#endif + /* Goto diff bucket. */ + tim_bkt_dec_lock(bkt); + goto __retry; + } else if (!rem) { + /* Only one thread can be here*/ + if (flags & OTX2_TIM_ENA_FB) + chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring); + if (flags & OTX2_TIM_ENA_DFB) + chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring); + + if (unlikely(chunk == NULL)) { + tim_bkt_set_rem(bkt, 0); + tim_bkt_dec_lock(bkt); + tim->impl_opaque[0] = 0; + tim->impl_opaque[1] = 0; + tim->state = RTE_EVENT_TIMER_ERROR; + return -ENOMEM; + } + *chunk = *pent; + while (tim_bkt_fetch_lock(lock_sema) != + (-tim_bkt_fetch_rem(lock_sema))) + lock_sema = __atomic_load_n(&bkt->w1, __ATOMIC_ACQUIRE); + + mirr_bkt->current_chunk = (uintptr_t)chunk; + __atomic_store_n(&bkt->chunk_remainder, + tim_ring->nb_chunk_slots - 1, __ATOMIC_RELEASE); + } else { + chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk; + chunk += tim_ring->nb_chunk_slots - rem; + *chunk = *pent; + } + + /* Copy work entry. */ + tim_bkt_inc_nent(bkt); + tim_bkt_dec_lock(bkt); + tim->impl_opaque[0] = (uintptr_t)chunk; + tim->impl_opaque[1] = (uintptr_t)bkt; + tim->state = RTE_EVENT_TIMER_ARMED; + + return 0; +} + +static inline uint16_t +tim_cpy_wrk(uint16_t index, uint16_t cpy_lmt, + struct otx2_tim_ent *chunk, + struct rte_event_timer ** const tim, + const struct otx2_tim_ent * const ents, + const struct otx2_tim_bkt * const bkt) +{ + for (; index < cpy_lmt; index++) { + *chunk = *(ents + index); + tim[index]->impl_opaque[0] = (uintptr_t)chunk++; + tim[index]->impl_opaque[1] = (uintptr_t)bkt; + tim[index]->state = RTE_EVENT_TIMER_ARMED; + } + + return index; +} + +/* Burst mode functions */ +static inline int +tim_add_entry_brst(struct otx2_tim_ring * const tim_ring, + const uint16_t rel_bkt, + struct rte_event_timer ** const tim, + const struct otx2_tim_ent *ents, + const uint16_t nb_timers, const uint8_t flags) +{ + struct otx2_tim_ent *chunk = NULL; + struct otx2_tim_bkt *mirr_bkt; + struct otx2_tim_bkt *bkt; + uint16_t chunk_remainder; + uint16_t index = 0; + uint64_t lock_sema; + int16_t rem, crem; + uint8_t lock_cnt; + +__retry: + tim_get_target_bucket(tim_ring, rel_bkt, &bkt, &mirr_bkt, flags); + + /* Only one thread beyond this. */ + lock_sema = tim_bkt_inc_lock(bkt); + lock_cnt = (uint8_t) + ((lock_sema >> TIM_BUCKET_W1_S_LOCK) & TIM_BUCKET_W1_M_LOCK); + + if (lock_cnt) { + tim_bkt_dec_lock(bkt); + goto __retry; + } + + /* Bucket related checks. */ + if (unlikely(tim_bkt_get_hbt(lock_sema))) { + if (tim_bkt_get_nent(lock_sema) != 0) { + uint64_t hbt_state; +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldaxr %[hbt], [%[w1]] \n" + " tbz %[hbt], 33, dne%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldaxr %[hbt], [%[w1]] \n" + " tbnz %[hbt], 33, rty%= \n" + "dne%=: \n" + : [hbt] "=&r" (hbt_state) + : [w1] "r" ((&bkt->w1)) + : "memory" + ); +#else + do { + hbt_state = __atomic_load_n(&bkt->w1, + __ATOMIC_ACQUIRE); + } while (hbt_state & BIT_ULL(33)); +#endif + + if (!(hbt_state & BIT_ULL(34))) { + tim_bkt_dec_lock(bkt); + goto __retry; + } + } + } + + chunk_remainder = tim_bkt_fetch_rem(lock_sema); + rem = chunk_remainder - nb_timers; + if (rem < 0) { + crem = tim_ring->nb_chunk_slots - chunk_remainder; + if (chunk_remainder && crem) { + chunk = ((struct otx2_tim_ent *) + mirr_bkt->current_chunk) + crem; + + index = tim_cpy_wrk(index, chunk_remainder, chunk, tim, + ents, bkt); + tim_bkt_sub_rem(bkt, chunk_remainder); + tim_bkt_add_nent(bkt, chunk_remainder); + } + + if (flags & OTX2_TIM_ENA_FB) + chunk = tim_refill_chunk(bkt, mirr_bkt, tim_ring); + if (flags & OTX2_TIM_ENA_DFB) + chunk = tim_insert_chunk(bkt, mirr_bkt, tim_ring); + + if (unlikely(chunk == NULL)) { + tim_bkt_dec_lock(bkt); + rte_errno = ENOMEM; + tim[index]->state = RTE_EVENT_TIMER_ERROR; + return crem; + } + *(uint64_t *)(chunk + tim_ring->nb_chunk_slots) = 0; + mirr_bkt->current_chunk = (uintptr_t)chunk; + tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt); + + rem = nb_timers - chunk_remainder; + tim_bkt_set_rem(bkt, tim_ring->nb_chunk_slots - rem); + tim_bkt_add_nent(bkt, rem); + } else { + chunk = (struct otx2_tim_ent *)mirr_bkt->current_chunk; + chunk += (tim_ring->nb_chunk_slots - chunk_remainder); + + tim_cpy_wrk(index, nb_timers, chunk, tim, ents, bkt); + tim_bkt_sub_rem(bkt, nb_timers); + tim_bkt_add_nent(bkt, nb_timers); + } + + tim_bkt_dec_lock(bkt); + + return nb_timers; +} + +static int +tim_rm_entry(struct rte_event_timer *tim) +{ + struct otx2_tim_ent *entry; + struct otx2_tim_bkt *bkt; + uint64_t lock_sema; + + if (tim->impl_opaque[1] == 0 || tim->impl_opaque[0] == 0) + return -ENOENT; + + entry = (struct otx2_tim_ent *)(uintptr_t)tim->impl_opaque[0]; + if (entry->wqe != tim->ev.u64) { + tim->impl_opaque[0] = 0; + tim->impl_opaque[1] = 0; + return -ENOENT; + } + + bkt = (struct otx2_tim_bkt *)(uintptr_t)tim->impl_opaque[1]; + lock_sema = tim_bkt_inc_lock(bkt); + if (tim_bkt_get_hbt(lock_sema) || !tim_bkt_get_nent(lock_sema)) { + tim_bkt_dec_lock(bkt); + tim->impl_opaque[0] = 0; + tim->impl_opaque[1] = 0; + return -ENOENT; + } + + entry->w0 = 0; + entry->wqe = 0; + tim_bkt_dec_lock(bkt); + + tim->state = RTE_EVENT_TIMER_CANCELED; + tim->impl_opaque[0] = 0; + tim->impl_opaque[1] = 0; + + return 0; +} + +#endif /* __OTX2_TIM_WORKER_H__ */ diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker.c b/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker.c new file mode 100644 index 000000000..88bac391c --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker.c @@ -0,0 +1,371 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#include "otx2_worker.h" + +static __rte_noinline uint8_t +otx2_ssogws_new_event(struct otx2_ssogws *ws, const struct rte_event *ev) +{ + const uint32_t tag = (uint32_t)ev->event; + const uint8_t new_tt = ev->sched_type; + const uint64_t event_ptr = ev->u64; + const uint16_t grp = ev->queue_id; + + if (ws->xaq_lmt <= *ws->fc_mem) + return 0; + + otx2_ssogws_add_work(ws, event_ptr, tag, new_tt, grp); + + return 1; +} + +static __rte_always_inline void +otx2_ssogws_fwd_swtag(struct otx2_ssogws *ws, const struct rte_event *ev) +{ + const uint32_t tag = (uint32_t)ev->event; + const uint8_t new_tt = ev->sched_type; + const uint8_t cur_tt = ws->cur_tt; + + /* 96XX model + * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED + * + * SSO_SYNC_ORDERED norm norm untag + * SSO_SYNC_ATOMIC norm norm untag + * SSO_SYNC_UNTAGGED norm norm NOOP + */ + + if (new_tt == SSO_SYNC_UNTAGGED) { + if (cur_tt != SSO_SYNC_UNTAGGED) + otx2_ssogws_swtag_untag(ws); + } else { + otx2_ssogws_swtag_norm(ws, tag, new_tt); + } + + ws->swtag_req = 1; +} + +static __rte_always_inline void +otx2_ssogws_fwd_group(struct otx2_ssogws *ws, const struct rte_event *ev, + const uint16_t grp) +{ + const uint32_t tag = (uint32_t)ev->event; + const uint8_t new_tt = ev->sched_type; + + otx2_write64(ev->u64, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + rte_smp_wmb(); + otx2_ssogws_swtag_desched(ws, tag, new_tt, grp); +} + +static __rte_always_inline void +otx2_ssogws_forward_event(struct otx2_ssogws *ws, const struct rte_event *ev) +{ + const uint8_t grp = ev->queue_id; + + /* Group hasn't changed, Use SWTAG to forward the event */ + if (ws->cur_grp == grp) + otx2_ssogws_fwd_swtag(ws, ev); + else + /* + * Group has been changed for group based work pipelining, + * Use deschedule/add_work operation to transfer the event to + * new group/core + */ + otx2_ssogws_fwd_group(ws, ev, grp); +} + +static __rte_always_inline void +otx2_ssogws_release_event(struct otx2_ssogws *ws) +{ + otx2_ssogws_swtag_flush(ws); +} + +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ +uint16_t __rte_hot \ +otx2_ssogws_deq_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct otx2_ssogws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + otx2_ssogws_swtag_wait(ws); \ + return 1; \ + } \ + \ + return otx2_ssogws_get_work(ws, ev, flags, ws->lookup_mem); \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_deq_burst_ ##name(void *port, struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return otx2_ssogws_deq_ ##name(port, ev, timeout_ticks); \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_deq_timeout_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct otx2_ssogws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + otx2_ssogws_swtag_wait(ws); \ + return ret; \ + } \ + \ + ret = otx2_ssogws_get_work(ws, ev, flags, ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = otx2_ssogws_get_work(ws, ev, flags, \ + ws->lookup_mem); \ + \ + return ret; \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_deq_timeout_burst_ ##name(void *port, struct rte_event ev[],\ + uint16_t nb_events, \ + uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return otx2_ssogws_deq_timeout_ ##name(port, ev, timeout_ticks);\ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_deq_seg_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct otx2_ssogws *ws = port; \ + \ + RTE_SET_USED(timeout_ticks); \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + otx2_ssogws_swtag_wait(ws); \ + return 1; \ + } \ + \ + return otx2_ssogws_get_work(ws, ev, flags | NIX_RX_MULTI_SEG_F, \ + ws->lookup_mem); \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_deq_seg_burst_ ##name(void *port, struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return otx2_ssogws_deq_seg_ ##name(port, ev, timeout_ticks); \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_deq_seg_timeout_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct otx2_ssogws *ws = port; \ + uint16_t ret = 1; \ + uint64_t iter; \ + \ + if (ws->swtag_req) { \ + ws->swtag_req = 0; \ + otx2_ssogws_swtag_wait(ws); \ + return ret; \ + } \ + \ + ret = otx2_ssogws_get_work(ws, ev, flags | NIX_RX_MULTI_SEG_F, \ + ws->lookup_mem); \ + for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) \ + ret = otx2_ssogws_get_work(ws, ev, \ + flags | NIX_RX_MULTI_SEG_F, \ + ws->lookup_mem); \ + \ + return ret; \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_deq_seg_timeout_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return otx2_ssogws_deq_seg_timeout_ ##name(port, ev, \ + timeout_ticks); \ +} + +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + +uint16_t __rte_hot +otx2_ssogws_enq(void *port, const struct rte_event *ev) +{ + struct otx2_ssogws *ws = port; + + switch (ev->op) { + case RTE_EVENT_OP_NEW: + rte_smp_mb(); + return otx2_ssogws_new_event(ws, ev); + case RTE_EVENT_OP_FORWARD: + otx2_ssogws_forward_event(ws, ev); + break; + case RTE_EVENT_OP_RELEASE: + otx2_ssogws_release_event(ws); + break; + default: + return 0; + } + + return 1; +} + +uint16_t __rte_hot +otx2_ssogws_enq_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + RTE_SET_USED(nb_events); + return otx2_ssogws_enq(port, ev); +} + +uint16_t __rte_hot +otx2_ssogws_enq_new_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + struct otx2_ssogws *ws = port; + uint16_t i, rc = 1; + + rte_smp_mb(); + if (ws->xaq_lmt <= *ws->fc_mem) + return 0; + + for (i = 0; i < nb_events && rc; i++) + rc = otx2_ssogws_new_event(ws, &ev[i]); + + return nb_events; +} + +uint16_t __rte_hot +otx2_ssogws_enq_fwd_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + struct otx2_ssogws *ws = port; + + RTE_SET_USED(nb_events); + otx2_ssogws_forward_event(ws, ev); + + return 1; +} + +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ +uint16_t __rte_hot \ +otx2_ssogws_tx_adptr_enq_ ## name(void *port, struct rte_event ev[], \ + uint16_t nb_events) \ +{ \ + struct otx2_ssogws *ws = port; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(ws, ev, cmd, flags); \ +} +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ +uint16_t __rte_hot \ +otx2_ssogws_tx_adptr_enq_seg_ ## name(void *port, struct rte_event ev[],\ + uint16_t nb_events) \ +{ \ + struct otx2_ssogws *ws = port; \ + uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ + \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(ws, ev, cmd, (flags) | \ + NIX_TX_MULTI_SEG_F); \ +} +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + +void +ssogws_flush_events(struct otx2_ssogws *ws, uint8_t queue_id, uintptr_t base, + otx2_handle_event_t fn, void *arg) +{ + uint64_t cq_ds_cnt = 1; + uint64_t aq_cnt = 1; + uint64_t ds_cnt = 1; + struct rte_event ev; + uint64_t enable; + uint64_t val; + + enable = otx2_read64(base + SSO_LF_GGRP_QCTL); + if (!enable) + return; + + val = queue_id; /* GGRP ID */ + val |= BIT_ULL(18); /* Grouped */ + val |= BIT_ULL(16); /* WAIT */ + + aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT); + ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT); + cq_ds_cnt = otx2_read64(base + SSO_LF_GGRP_INT_CNT); + cq_ds_cnt &= 0x3FFF3FFF0000; + + while (aq_cnt || cq_ds_cnt || ds_cnt) { + otx2_write64(val, ws->getwrk_op); + otx2_ssogws_get_work_empty(ws, &ev, 0); + if (fn != NULL && ev.u64 != 0) + fn(arg, ev); + if (ev.sched_type != SSO_TT_EMPTY) + otx2_ssogws_swtag_flush(ws); + rte_mb(); + aq_cnt = otx2_read64(base + SSO_LF_GGRP_AQ_CNT); + ds_cnt = otx2_read64(base + SSO_LF_GGRP_MISC_CNT); + cq_ds_cnt = otx2_read64(base + SSO_LF_GGRP_INT_CNT); + /* Extract cq and ds count */ + cq_ds_cnt &= 0x3FFF3FFF0000; + } + + otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + + SSOW_LF_GWS_OP_GWC_INVAL); + rte_mb(); +} + +void +ssogws_reset(struct otx2_ssogws *ws) +{ + uintptr_t base = OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op); + uint64_t pend_state; + uint8_t pend_tt; + uint64_t tag; + + /* Wait till getwork/swtp/waitw/desched completes. */ + do { + pend_state = otx2_read64(base + SSOW_LF_GWS_PENDSTATE); + rte_mb(); + } while (pend_state & (BIT_ULL(63) | BIT_ULL(62) | BIT_ULL(58))); + + tag = otx2_read64(base + SSOW_LF_GWS_TAG); + pend_tt = (tag >> 32) & 0x3; + if (pend_tt != SSO_TT_EMPTY) { /* Work was pending */ + if (pend_tt == SSO_SYNC_ATOMIC || pend_tt == SSO_SYNC_ORDERED) + otx2_ssogws_swtag_untag(ws); + otx2_ssogws_desched(ws); + } + rte_mb(); + + /* Wait for desched to complete. */ + do { + pend_state = otx2_read64(base + SSOW_LF_GWS_PENDSTATE); + rte_mb(); + } while (pend_state & BIT_ULL(58)); +} diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker.h b/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker.h new file mode 100644 index 000000000..5f5aa8746 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker.h @@ -0,0 +1,310 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#ifndef __OTX2_WORKER_H__ +#define __OTX2_WORKER_H__ + +#include <rte_common.h> +#include <rte_branch_prediction.h> + +#include <otx2_common.h> +#include "otx2_evdev.h" +#include "otx2_ethdev_sec_tx.h" + +/* SSO Operations */ + +static __rte_always_inline uint16_t +otx2_ssogws_get_work(struct otx2_ssogws *ws, struct rte_event *ev, + const uint32_t flags, const void * const lookup_mem) +{ + union otx2_sso_event event; + uint64_t tstamp_ptr; + uint64_t get_work1; + uint64_t mbuf; + + otx2_write64(BIT_ULL(16) | /* wait for work. */ + 1, /* Use Mask set 0. */ + ws->getwrk_op); + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldr %[tag], [%[tag_loc]] \n" + " ldr %[wqp], [%[wqp_loc]] \n" + " tbz %[tag], 63, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_loc]] \n" + " ldr %[wqp], [%[wqp_loc]] \n" + " tbnz %[tag], 63, rty%= \n" + "done%=: dmb ld \n" + " prfm pldl1keep, [%[wqp], #8] \n" + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r" (event.get_work0), + [wqp] "=&r" (get_work1), + [mbuf] "=&r" (mbuf) + : [tag_loc] "r" (ws->tag_op), + [wqp_loc] "r" (ws->wqp_op) + ); +#else + event.get_work0 = otx2_read64(ws->tag_op); + while ((BIT_ULL(63)) & event.get_work0) + event.get_work0 = otx2_read64(ws->tag_op); + + get_work1 = otx2_read64(ws->wqp_op); + rte_prefetch0((const void *)get_work1); + mbuf = (uint64_t)((char *)get_work1 - sizeof(struct rte_mbuf)); + rte_prefetch0((const void *)mbuf); +#endif + + event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | + (event.get_work0 & (0x3FFull << 36)) << 4 | + (event.get_work0 & 0xffffffff); + ws->cur_tt = event.sched_type; + ws->cur_grp = event.queue_id; + + if (event.sched_type != SSO_TT_EMPTY && + event.event_type == RTE_EVENT_TYPE_ETHDEV) { + otx2_wqe_to_mbuf(get_work1, mbuf, event.sub_event_type, + (uint32_t) event.get_work0, flags, lookup_mem); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)get_work1) + + OTX2_SSO_WQE_SG_PTR); + otx2_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, ws->tstamp, + flags, (uint64_t *)tstamp_ptr); + get_work1 = mbuf; + } + + ev->event = event.get_work0; + ev->u64 = get_work1; + + return !!get_work1; +} + +/* Used in cleaning up workslot. */ +static __rte_always_inline uint16_t +otx2_ssogws_get_work_empty(struct otx2_ssogws *ws, struct rte_event *ev, + const uint32_t flags) +{ + union otx2_sso_event event; + uint64_t tstamp_ptr; + uint64_t get_work1; + uint64_t mbuf; + +#ifdef RTE_ARCH_ARM64 + asm volatile( + " ldr %[tag], [%[tag_loc]] \n" + " ldr %[wqp], [%[wqp_loc]] \n" + " tbz %[tag], 63, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_loc]] \n" + " ldr %[wqp], [%[wqp_loc]] \n" + " tbnz %[tag], 63, rty%= \n" + "done%=: dmb ld \n" + " prfm pldl1keep, [%[wqp], #8] \n" + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r" (event.get_work0), + [wqp] "=&r" (get_work1), + [mbuf] "=&r" (mbuf) + : [tag_loc] "r" (ws->tag_op), + [wqp_loc] "r" (ws->wqp_op) + ); +#else + event.get_work0 = otx2_read64(ws->tag_op); + while ((BIT_ULL(63)) & event.get_work0) + event.get_work0 = otx2_read64(ws->tag_op); + + get_work1 = otx2_read64(ws->wqp_op); + rte_prefetch_non_temporal((const void *)get_work1); + mbuf = (uint64_t)((char *)get_work1 - sizeof(struct rte_mbuf)); + rte_prefetch_non_temporal((const void *)mbuf); +#endif + + event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | + (event.get_work0 & (0x3FFull << 36)) << 4 | + (event.get_work0 & 0xffffffff); + ws->cur_tt = event.sched_type; + ws->cur_grp = event.queue_id; + + if (event.sched_type != SSO_TT_EMPTY && + event.event_type == RTE_EVENT_TYPE_ETHDEV) { + otx2_wqe_to_mbuf(get_work1, mbuf, event.sub_event_type, + (uint32_t) event.get_work0, flags, NULL); + /* Extracting tstamp, if PTP enabled*/ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)get_work1) + + OTX2_SSO_WQE_SG_PTR); + otx2_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, ws->tstamp, + flags, (uint64_t *)tstamp_ptr); + get_work1 = mbuf; + } + + ev->event = event.get_work0; + ev->u64 = get_work1; + + return !!get_work1; +} + +static __rte_always_inline void +otx2_ssogws_add_work(struct otx2_ssogws *ws, const uint64_t event_ptr, + const uint32_t tag, const uint8_t new_tt, + const uint16_t grp) +{ + uint64_t add_work0; + + add_work0 = tag | ((uint64_t)(new_tt) << 32); + otx2_store_pair(add_work0, event_ptr, ws->grps_base[grp]); +} + +static __rte_always_inline void +otx2_ssogws_swtag_desched(struct otx2_ssogws *ws, uint32_t tag, uint8_t new_tt, + uint16_t grp) +{ + uint64_t val; + + val = tag | ((uint64_t)(new_tt & 0x3) << 32) | ((uint64_t)grp << 34); + otx2_write64(val, ws->swtag_desched_op); +} + +static __rte_always_inline void +otx2_ssogws_swtag_norm(struct otx2_ssogws *ws, uint32_t tag, uint8_t new_tt) +{ + uint64_t val; + + val = tag | ((uint64_t)(new_tt & 0x3) << 32); + otx2_write64(val, ws->swtag_norm_op); +} + +static __rte_always_inline void +otx2_ssogws_swtag_untag(struct otx2_ssogws *ws) +{ + otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + + SSOW_LF_GWS_OP_SWTAG_UNTAG); + ws->cur_tt = SSO_SYNC_UNTAGGED; +} + +static __rte_always_inline void +otx2_ssogws_swtag_flush(struct otx2_ssogws *ws) +{ + otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + + SSOW_LF_GWS_OP_SWTAG_FLUSH); + ws->cur_tt = SSO_SYNC_EMPTY; +} + +static __rte_always_inline void +otx2_ssogws_desched(struct otx2_ssogws *ws) +{ + otx2_write64(0, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + + SSOW_LF_GWS_OP_DESCHED); +} + +static __rte_always_inline void +otx2_ssogws_swtag_wait(struct otx2_ssogws *ws) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t swtp; + + asm volatile ( + " ldr %[swtb], [%[swtp_loc]] \n" + " cbz %[swtb], done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[swtb], [%[swtp_loc]] \n" + " cbnz %[swtb], rty%= \n" + "done%=: \n" + : [swtb] "=&r" (swtp) + : [swtp_loc] "r" (ws->swtp_op) + ); +#else + /* Wait for the SWTAG/SWTAG_FULL operation */ + while (otx2_read64(ws->swtp_op)) + ; +#endif +} + +static __rte_always_inline void +otx2_ssogws_head_wait(struct otx2_ssogws *ws) +{ +#ifdef RTE_ARCH_ARM64 + uint64_t tag; + + asm volatile ( + " ldr %[tag], [%[tag_op]] \n" + " tbnz %[tag], 35, done%= \n" + " sevl \n" + "rty%=: wfe \n" + " ldr %[tag], [%[tag_op]] \n" + " tbz %[tag], 35, rty%= \n" + "done%=: \n" + : [tag] "=&r" (tag) + : [tag_op] "r" (ws->tag_op) + ); +#else + /* Wait for the HEAD to be set */ + while (!(otx2_read64(ws->tag_op) & BIT_ULL(35))) + ; +#endif +} + +static __rte_always_inline void +otx2_ssogws_order(struct otx2_ssogws *ws, const uint8_t wait_flag) +{ + if (wait_flag) + otx2_ssogws_head_wait(ws); + + rte_cio_wmb(); +} + +static __rte_always_inline const struct otx2_eth_txq * +otx2_ssogws_xtract_meta(struct rte_mbuf *m) +{ + return rte_eth_devices[m->port].data->tx_queues[ + rte_event_eth_tx_adapter_txq_get(m)]; +} + +static __rte_always_inline void +otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m, + uint64_t *cmd, const uint32_t flags) +{ + otx2_lmt_mov(cmd, txq->cmd, otx2_nix_tx_ext_subs(flags)); + otx2_nix_xmit_prepare(m, cmd, flags); +} + +static __rte_always_inline uint16_t +otx2_ssogws_event_tx(struct otx2_ssogws *ws, struct rte_event ev[], + uint64_t *cmd, const uint32_t flags) +{ + struct rte_mbuf *m = ev[0].mbuf; + const struct otx2_eth_txq *txq = otx2_ssogws_xtract_meta(m); + + rte_prefetch_non_temporal(txq); + + if ((flags & NIX_TX_OFFLOAD_SECURITY_F) && + (m->ol_flags & PKT_TX_SEC_OFFLOAD)) + return otx2_sec_event_tx(ws, ev, m, txq, flags); + + /* Perform header writes before barrier for TSO */ + otx2_nix_xmit_prepare_tso(m, flags); + otx2_ssogws_order(ws, !ev->sched_type); + otx2_ssogws_prepare_pkt(txq, m, cmd, flags); + + if (flags & NIX_TX_MULTI_SEG_F) { + const uint16_t segdw = otx2_nix_prepare_mseg(m, cmd, flags); + otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], + m->ol_flags, segdw, flags); + otx2_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr, segdw); + } else { + /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */ + otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0], + m->ol_flags, 4, flags); + otx2_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr, flags); + } + + return 1; +} + +#endif diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker_dual.c b/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker_dual.c new file mode 100644 index 000000000..3d55d921b --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker_dual.c @@ -0,0 +1,343 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#include "otx2_worker_dual.h" +#include "otx2_worker.h" + +static __rte_noinline uint8_t +otx2_ssogws_dual_new_event(struct otx2_ssogws_dual *ws, + const struct rte_event *ev) +{ + const uint32_t tag = (uint32_t)ev->event; + const uint8_t new_tt = ev->sched_type; + const uint64_t event_ptr = ev->u64; + const uint16_t grp = ev->queue_id; + + if (ws->xaq_lmt <= *ws->fc_mem) + return 0; + + otx2_ssogws_dual_add_work(ws, event_ptr, tag, new_tt, grp); + + return 1; +} + +static __rte_always_inline void +otx2_ssogws_dual_fwd_swtag(struct otx2_ssogws_state *ws, + const struct rte_event *ev) +{ + const uint32_t tag = (uint32_t)ev->event; + const uint8_t new_tt = ev->sched_type; + const uint8_t cur_tt = ws->cur_tt; + + /* 96XX model + * cur_tt/new_tt SSO_SYNC_ORDERED SSO_SYNC_ATOMIC SSO_SYNC_UNTAGGED + * + * SSO_SYNC_ORDERED norm norm untag + * SSO_SYNC_ATOMIC norm norm untag + * SSO_SYNC_UNTAGGED norm norm NOOP + */ + if (new_tt == SSO_SYNC_UNTAGGED) { + if (cur_tt != SSO_SYNC_UNTAGGED) + otx2_ssogws_swtag_untag((struct otx2_ssogws *)ws); + } else { + otx2_ssogws_swtag_norm((struct otx2_ssogws *)ws, tag, new_tt); + } +} + +static __rte_always_inline void +otx2_ssogws_dual_fwd_group(struct otx2_ssogws_state *ws, + const struct rte_event *ev, const uint16_t grp) +{ + const uint32_t tag = (uint32_t)ev->event; + const uint8_t new_tt = ev->sched_type; + + otx2_write64(ev->u64, OTX2_SSOW_GET_BASE_ADDR(ws->getwrk_op) + + SSOW_LF_GWS_OP_UPD_WQP_GRP1); + rte_smp_wmb(); + otx2_ssogws_swtag_desched((struct otx2_ssogws *)ws, tag, new_tt, grp); +} + +static __rte_always_inline void +otx2_ssogws_dual_forward_event(struct otx2_ssogws_dual *ws, + struct otx2_ssogws_state *vws, + const struct rte_event *ev) +{ + const uint8_t grp = ev->queue_id; + + /* Group hasn't changed, Use SWTAG to forward the event */ + if (vws->cur_grp == grp) { + otx2_ssogws_dual_fwd_swtag(vws, ev); + ws->swtag_req = 1; + } else { + /* + * Group has been changed for group based work pipelining, + * Use deschedule/add_work operation to transfer the event to + * new group/core + */ + otx2_ssogws_dual_fwd_group(vws, ev, grp); + } +} + +uint16_t __rte_hot +otx2_ssogws_dual_enq(void *port, const struct rte_event *ev) +{ + struct otx2_ssogws_dual *ws = port; + struct otx2_ssogws_state *vws = &ws->ws_state[!ws->vws]; + + switch (ev->op) { + case RTE_EVENT_OP_NEW: + rte_smp_mb(); + return otx2_ssogws_dual_new_event(ws, ev); + case RTE_EVENT_OP_FORWARD: + otx2_ssogws_dual_forward_event(ws, vws, ev); + break; + case RTE_EVENT_OP_RELEASE: + otx2_ssogws_swtag_flush((struct otx2_ssogws *)vws); + break; + default: + return 0; + } + + return 1; +} + +uint16_t __rte_hot +otx2_ssogws_dual_enq_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + RTE_SET_USED(nb_events); + return otx2_ssogws_dual_enq(port, ev); +} + +uint16_t __rte_hot +otx2_ssogws_dual_enq_new_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + struct otx2_ssogws_dual *ws = port; + uint16_t i, rc = 1; + + rte_smp_mb(); + if (ws->xaq_lmt <= *ws->fc_mem) + return 0; + + for (i = 0; i < nb_events && rc; i++) + rc = otx2_ssogws_dual_new_event(ws, &ev[i]); + + return nb_events; +} + +uint16_t __rte_hot +otx2_ssogws_dual_enq_fwd_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + struct otx2_ssogws_dual *ws = port; + struct otx2_ssogws_state *vws = &ws->ws_state[!ws->vws]; + + RTE_SET_USED(nb_events); + otx2_ssogws_dual_forward_event(ws, vws, ev); + + return 1; +} + +#define R(name, f6, f5, f4, f3, f2, f1, f0, flags) \ +uint16_t __rte_hot \ +otx2_ssogws_dual_deq_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct otx2_ssogws_dual *ws = port; \ + uint8_t gw; \ + \ + rte_prefetch_non_temporal(ws); \ + RTE_SET_USED(timeout_ticks); \ + if (ws->swtag_req) { \ + otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ + &ws->ws_state[!ws->vws]); \ + ws->swtag_req = 0; \ + return 1; \ + } \ + \ + gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ + &ws->ws_state[!ws->vws], ev, \ + flags, ws->lookup_mem, \ + ws->tstamp); \ + ws->vws = !ws->vws; \ + \ + return gw; \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_dual_deq_burst_ ##name(void *port, struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return otx2_ssogws_dual_deq_ ##name(port, ev, timeout_ticks); \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_dual_deq_timeout_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct otx2_ssogws_dual *ws = port; \ + uint64_t iter; \ + uint8_t gw; \ + \ + if (ws->swtag_req) { \ + otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ + &ws->ws_state[!ws->vws]); \ + ws->swtag_req = 0; \ + return 1; \ + } \ + \ + gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ + &ws->ws_state[!ws->vws], ev, \ + flags, ws->lookup_mem, \ + ws->tstamp); \ + ws->vws = !ws->vws; \ + for (iter = 1; iter < timeout_ticks && (gw == 0); iter++) { \ + gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ + &ws->ws_state[!ws->vws], \ + ev, flags, \ + ws->lookup_mem, \ + ws->tstamp); \ + ws->vws = !ws->vws; \ + } \ + \ + return gw; \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_dual_deq_timeout_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return otx2_ssogws_dual_deq_timeout_ ##name(port, ev, \ + timeout_ticks); \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_dual_deq_seg_ ##name(void *port, struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct otx2_ssogws_dual *ws = port; \ + uint8_t gw; \ + \ + RTE_SET_USED(timeout_ticks); \ + if (ws->swtag_req) { \ + otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ + &ws->ws_state[!ws->vws]); \ + ws->swtag_req = 0; \ + return 1; \ + } \ + \ + gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ + &ws->ws_state[!ws->vws], ev, \ + flags | NIX_RX_MULTI_SEG_F, \ + ws->lookup_mem, \ + ws->tstamp); \ + ws->vws = !ws->vws; \ + \ + return gw; \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_dual_deq_seg_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return otx2_ssogws_dual_deq_seg_ ##name(port, ev, \ + timeout_ticks); \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_dual_deq_seg_timeout_ ##name(void *port, \ + struct rte_event *ev, \ + uint64_t timeout_ticks) \ +{ \ + struct otx2_ssogws_dual *ws = port; \ + uint64_t iter; \ + uint8_t gw; \ + \ + if (ws->swtag_req) { \ + otx2_ssogws_swtag_wait((struct otx2_ssogws *) \ + &ws->ws_state[!ws->vws]); \ + ws->swtag_req = 0; \ + return 1; \ + } \ + \ + gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ + &ws->ws_state[!ws->vws], ev, \ + flags | NIX_RX_MULTI_SEG_F, \ + ws->lookup_mem, \ + ws->tstamp); \ + ws->vws = !ws->vws; \ + for (iter = 1; iter < timeout_ticks && (gw == 0); iter++) { \ + gw = otx2_ssogws_dual_get_work(&ws->ws_state[ws->vws], \ + &ws->ws_state[!ws->vws], \ + ev, flags | \ + NIX_RX_MULTI_SEG_F, \ + ws->lookup_mem, \ + ws->tstamp); \ + ws->vws = !ws->vws; \ + } \ + \ + return gw; \ +} \ + \ +uint16_t __rte_hot \ +otx2_ssogws_dual_deq_seg_timeout_burst_ ##name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events, \ + uint64_t timeout_ticks) \ +{ \ + RTE_SET_USED(nb_events); \ + \ + return otx2_ssogws_dual_deq_seg_timeout_ ##name(port, ev, \ + timeout_ticks); \ +} + +SSO_RX_ADPTR_ENQ_FASTPATH_FUNC +#undef R + +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ +uint16_t __rte_hot \ +otx2_ssogws_dual_tx_adptr_enq_ ## name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events) \ +{ \ + struct otx2_ssogws_dual *ws = port; \ + struct otx2_ssogws *vws = \ + (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ + uint64_t cmd[sz]; \ + \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(vws, ev, cmd, flags); \ +} +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T + +#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \ +uint16_t __rte_hot \ +otx2_ssogws_dual_tx_adptr_enq_seg_ ## name(void *port, \ + struct rte_event ev[], \ + uint16_t nb_events) \ +{ \ + struct otx2_ssogws_dual *ws = port; \ + struct otx2_ssogws *vws = \ + (struct otx2_ssogws *)&ws->ws_state[!ws->vws]; \ + uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \ + \ + RTE_SET_USED(nb_events); \ + return otx2_ssogws_event_tx(vws, ev, cmd, (flags) | \ + NIX_TX_MULTI_SEG_F); \ +} +SSO_TX_ADPTR_ENQ_FASTPATH_FUNC +#undef T diff --git a/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker_dual.h b/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker_dual.h new file mode 100644 index 000000000..c88420eb4 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/otx2_worker_dual.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2019 Marvell International Ltd. + */ + +#ifndef __OTX2_WORKER_DUAL_H__ +#define __OTX2_WORKER_DUAL_H__ + +#include <rte_branch_prediction.h> +#include <rte_common.h> + +#include <otx2_common.h> +#include "otx2_evdev.h" + +/* SSO Operations */ +static __rte_always_inline uint16_t +otx2_ssogws_dual_get_work(struct otx2_ssogws_state *ws, + struct otx2_ssogws_state *ws_pair, + struct rte_event *ev, const uint32_t flags, + const void * const lookup_mem, + struct otx2_timesync_info * const tstamp) +{ + const uint64_t set_gw = BIT_ULL(16) | 1; + union otx2_sso_event event; + uint64_t tstamp_ptr; + uint64_t get_work1; + uint64_t mbuf; + + if (flags & NIX_RX_OFFLOAD_PTYPE_F) + rte_prefetch_non_temporal(lookup_mem); +#ifdef RTE_ARCH_ARM64 + asm volatile( + "rty%=: \n" + " ldr %[tag], [%[tag_loc]] \n" + " ldr %[wqp], [%[wqp_loc]] \n" + " tbnz %[tag], 63, rty%= \n" + "done%=: str %[gw], [%[pong]] \n" + " dmb ld \n" + " prfm pldl1keep, [%[wqp], #8]\n" + " sub %[mbuf], %[wqp], #0x80 \n" + " prfm pldl1keep, [%[mbuf]] \n" + : [tag] "=&r" (event.get_work0), + [wqp] "=&r" (get_work1), + [mbuf] "=&r" (mbuf) + : [tag_loc] "r" (ws->tag_op), + [wqp_loc] "r" (ws->wqp_op), + [gw] "r" (set_gw), + [pong] "r" (ws_pair->getwrk_op) + ); +#else + event.get_work0 = otx2_read64(ws->tag_op); + while ((BIT_ULL(63)) & event.get_work0) + event.get_work0 = otx2_read64(ws->tag_op); + get_work1 = otx2_read64(ws->wqp_op); + otx2_write64(set_gw, ws_pair->getwrk_op); + + rte_prefetch0((const void *)get_work1); + mbuf = (uint64_t)((char *)get_work1 - sizeof(struct rte_mbuf)); + rte_prefetch0((const void *)mbuf); +#endif + event.get_work0 = (event.get_work0 & (0x3ull << 32)) << 6 | + (event.get_work0 & (0x3FFull << 36)) << 4 | + (event.get_work0 & 0xffffffff); + ws->cur_tt = event.sched_type; + ws->cur_grp = event.queue_id; + + if (event.sched_type != SSO_TT_EMPTY && + event.event_type == RTE_EVENT_TYPE_ETHDEV) { + otx2_wqe_to_mbuf(get_work1, mbuf, event.sub_event_type, + (uint32_t) event.get_work0, flags, lookup_mem); + /* Extracting tstamp, if PTP enabled. CGX will prepend the + * timestamp at starting of packet data and it can be derieved + * from WQE 9 dword which corresponds to SG iova. + * rte_pktmbuf_mtod_offset can be used for this purpose but it + * brings down the performance as it reads mbuf->buf_addr which + * is not part of cache in general fast path. + */ + tstamp_ptr = *(uint64_t *)(((struct nix_wqe_hdr_s *)get_work1) + + OTX2_SSO_WQE_SG_PTR); + otx2_nix_mbuf_to_tstamp((struct rte_mbuf *)mbuf, tstamp, flags, + (uint64_t *)tstamp_ptr); + get_work1 = mbuf; + } + + ev->event = event.get_work0; + ev->u64 = get_work1; + + return !!get_work1; +} + +static __rte_always_inline void +otx2_ssogws_dual_add_work(struct otx2_ssogws_dual *ws, const uint64_t event_ptr, + const uint32_t tag, const uint8_t new_tt, + const uint16_t grp) +{ + uint64_t add_work0; + + add_work0 = tag | ((uint64_t)(new_tt) << 32); + otx2_store_pair(add_work0, event_ptr, ws->grps_base[grp]); +} + +#endif diff --git a/src/spdk/dpdk/drivers/event/octeontx2/rte_pmd_octeontx2_event_version.map b/src/spdk/dpdk/drivers/event/octeontx2/rte_pmd_octeontx2_event_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/octeontx2/rte_pmd_octeontx2_event_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/event/opdl/Makefile b/src/spdk/dpdk/drivers/event/opdl/Makefile new file mode 100644 index 000000000..71713b00a --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2017 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_pmd_opdl_event.a + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +LDLIBS += -lrte_eal -lrte_eventdev -lrte_kvargs +LDLIBS += -lrte_bus_vdev -lrte_mbuf -lrte_mempool + +# versioning export map +EXPORT_MAP := rte_pmd_opdl_event_version.map + +# library source files +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_ring.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_evdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_evdev_init.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_evdev_xstats.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_test.c + +# export include files +SYMLINK-y-include += + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/event/opdl/meson.build b/src/spdk/dpdk/drivers/event/opdl/meson.build new file mode 100644 index 000000000..cc6029c6f --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/meson.build @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Luca Boccassi <bluca@debian.org> + +sources = files( + 'opdl_evdev.c', + 'opdl_evdev_init.c', + 'opdl_evdev_xstats.c', + 'opdl_ring.c', + 'opdl_test.c', +) +deps += ['bus_vdev'] diff --git a/src/spdk/dpdk/drivers/event/opdl/opdl_evdev.c b/src/spdk/dpdk/drivers/event/opdl/opdl_evdev.c new file mode 100644 index 000000000..3beca8957 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/opdl_evdev.c @@ -0,0 +1,767 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <inttypes.h> +#include <string.h> + +#include <rte_bus_vdev.h> +#include <rte_lcore.h> +#include <rte_memzone.h> +#include <rte_kvargs.h> +#include <rte_errno.h> +#include <rte_cycles.h> + +#include "opdl_evdev.h" +#include "opdl_ring.h" +#include "opdl_log.h" + +#define EVENTDEV_NAME_OPDL_PMD event_opdl +#define NUMA_NODE_ARG "numa_node" +#define DO_VALIDATION_ARG "do_validation" +#define DO_TEST_ARG "self_test" + + +static void +opdl_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info); + +uint16_t +opdl_event_enqueue_burst(void *port, + const struct rte_event ev[], + uint16_t num) +{ + struct opdl_port *p = port; + + if (unlikely(!p->opdl->data->dev_started)) + return 0; + + + /* either rx_enqueue or disclaim*/ + return p->enq(p, ev, num); +} + +uint16_t +opdl_event_enqueue(void *port, const struct rte_event *ev) +{ + struct opdl_port *p = port; + + if (unlikely(!p->opdl->data->dev_started)) + return 0; + + + return p->enq(p, ev, 1); +} + +uint16_t +opdl_event_dequeue_burst(void *port, + struct rte_event *ev, + uint16_t num, + uint64_t wait) +{ + struct opdl_port *p = (void *)port; + + RTE_SET_USED(wait); + + if (unlikely(!p->opdl->data->dev_started)) + return 0; + + /* This function pointer can point to tx_dequeue or claim*/ + return p->deq(p, ev, num); +} + +uint16_t +opdl_event_dequeue(void *port, + struct rte_event *ev, + uint64_t wait) +{ + struct opdl_port *p = (void *)port; + + if (unlikely(!p->opdl->data->dev_started)) + return 0; + + RTE_SET_USED(wait); + + return p->deq(p, ev, 1); +} + +static int +opdl_port_link(struct rte_eventdev *dev, + void *port, + const uint8_t queues[], + const uint8_t priorities[], + uint16_t num) +{ + struct opdl_port *p = port; + + RTE_SET_USED(priorities); + RTE_SET_USED(dev); + + if (unlikely(dev->data->dev_started)) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Attempt to link queue (%u) to port %d while device started\n", + dev->data->dev_id, + queues[0], + p->id); + rte_errno = EINVAL; + return 0; + } + + /* Max of 1 queue per port */ + if (num > 1) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Attempt to link more than one queue (%u) to port %d requested\n", + dev->data->dev_id, + num, + p->id); + rte_errno = EDQUOT; + return 0; + } + + if (!p->configured) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "port %d not configured, cannot link to %u\n", + dev->data->dev_id, + p->id, + queues[0]); + rte_errno = EINVAL; + return 0; + } + + if (p->external_qid != OPDL_INVALID_QID) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "port %d already linked to queue %u, cannot link to %u\n", + dev->data->dev_id, + p->id, + p->external_qid, + queues[0]); + rte_errno = EINVAL; + return 0; + } + + p->external_qid = queues[0]; + + return 1; +} + +static int +opdl_port_unlink(struct rte_eventdev *dev, + void *port, + uint8_t queues[], + uint16_t nb_unlinks) +{ + struct opdl_port *p = port; + + RTE_SET_USED(queues); + RTE_SET_USED(nb_unlinks); + + if (unlikely(dev->data->dev_started)) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Attempt to unlink queue (%u) to port %d while device started\n", + dev->data->dev_id, + queues[0], + p->id); + rte_errno = EINVAL; + return 0; + } + RTE_SET_USED(nb_unlinks); + + /* Port Stuff */ + p->queue_id = OPDL_INVALID_QID; + p->p_type = OPDL_INVALID_PORT; + p->external_qid = OPDL_INVALID_QID; + + /* always unlink 0 queue due to statice pipeline */ + return 0; +} + +static int +opdl_port_setup(struct rte_eventdev *dev, + uint8_t port_id, + const struct rte_event_port_conf *conf) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + struct opdl_port *p = &device->ports[port_id]; + + RTE_SET_USED(conf); + + /* Check if port already configured */ + if (p->configured) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Attempt to setup port %d which is already setup\n", + dev->data->dev_id, + p->id); + return -EDQUOT; + } + + *p = (struct opdl_port){0}; /* zero entire structure */ + p->id = port_id; + p->opdl = device; + p->queue_id = OPDL_INVALID_QID; + p->external_qid = OPDL_INVALID_QID; + dev->data->ports[port_id] = p; + rte_smp_wmb(); + p->configured = 1; + device->nb_ports++; + return 0; +} + +static void +opdl_port_release(void *port) +{ + struct opdl_port *p = (void *)port; + + if (p == NULL || + p->opdl->data->dev_started) { + return; + } + + p->configured = 0; + p->initialized = 0; +} + +static void +opdl_port_def_conf(struct rte_eventdev *dev, uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(port_id); + + port_conf->new_event_threshold = MAX_OPDL_CONS_Q_DEPTH; + port_conf->dequeue_depth = MAX_OPDL_CONS_Q_DEPTH; + port_conf->enqueue_depth = MAX_OPDL_CONS_Q_DEPTH; +} + +static int +opdl_queue_setup(struct rte_eventdev *dev, + uint8_t queue_id, + const struct rte_event_queue_conf *conf) +{ + enum queue_type type; + + struct opdl_evdev *device = opdl_pmd_priv(dev); + + /* Extra sanity check, probably not needed */ + if (queue_id == OPDL_INVALID_QID) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Invalid queue id %u requested\n", + dev->data->dev_id, + queue_id); + return -EINVAL; + } + + if (device->nb_q_md > device->max_queue_nb) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Max number of queues %u exceeded by request %u\n", + dev->data->dev_id, + device->max_queue_nb, + device->nb_q_md); + return -EINVAL; + } + + if (RTE_EVENT_QUEUE_CFG_ALL_TYPES + & conf->event_queue_cfg) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "QUEUE_CFG_ALL_TYPES not supported\n", + dev->data->dev_id); + return -ENOTSUP; + } else if (RTE_EVENT_QUEUE_CFG_SINGLE_LINK + & conf->event_queue_cfg) { + type = OPDL_Q_TYPE_SINGLE_LINK; + } else { + switch (conf->schedule_type) { + case RTE_SCHED_TYPE_ORDERED: + type = OPDL_Q_TYPE_ORDERED; + break; + case RTE_SCHED_TYPE_ATOMIC: + type = OPDL_Q_TYPE_ATOMIC; + break; + case RTE_SCHED_TYPE_PARALLEL: + type = OPDL_Q_TYPE_ORDERED; + break; + default: + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Unknown queue type %d requested\n", + dev->data->dev_id, + conf->event_queue_cfg); + return -EINVAL; + } + } + /* Check if queue id has been setup already */ + uint32_t i; + for (i = 0; i < device->nb_q_md; i++) { + if (device->q_md[i].ext_id == queue_id) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "queue id %u already setup\n", + dev->data->dev_id, + queue_id); + return -EINVAL; + } + } + + device->q_md[device->nb_q_md].ext_id = queue_id; + device->q_md[device->nb_q_md].type = type; + device->q_md[device->nb_q_md].setup = 1; + device->nb_q_md++; + + return 1; +} + +static void +opdl_queue_release(struct rte_eventdev *dev, uint8_t queue_id) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + + RTE_SET_USED(queue_id); + + if (device->data->dev_started) + return; + +} + +static void +opdl_queue_def_conf(struct rte_eventdev *dev, + uint8_t queue_id, + struct rte_event_queue_conf *conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); + + static const struct rte_event_queue_conf default_conf = { + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1, + .event_queue_cfg = 0, + .schedule_type = RTE_SCHED_TYPE_ORDERED, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + }; + + *conf = default_conf; +} + + +static int +opdl_dev_configure(const struct rte_eventdev *dev) +{ + struct opdl_evdev *opdl = opdl_pmd_priv(dev); + const struct rte_eventdev_data *data = dev->data; + const struct rte_event_dev_config *conf = &data->dev_conf; + + opdl->max_queue_nb = conf->nb_event_queues; + opdl->max_port_nb = conf->nb_event_ports; + opdl->nb_events_limit = conf->nb_events_limit; + + if (conf->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "DEQUEUE_TIMEOUT not supported\n", + dev->data->dev_id); + return -ENOTSUP; + } + + return 0; +} + +static void +opdl_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) +{ + RTE_SET_USED(dev); + + static const struct rte_event_dev_info evdev_opdl_info = { + .driver_name = OPDL_PMD_NAME, + .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV, + .max_event_queue_flows = OPDL_QID_NUM_FIDS, + .max_event_queue_priority_levels = OPDL_Q_PRIORITY_MAX, + .max_event_priority_levels = OPDL_IQS_MAX, + .max_event_ports = OPDL_PORTS_MAX, + .max_event_port_dequeue_depth = MAX_OPDL_CONS_Q_DEPTH, + .max_event_port_enqueue_depth = MAX_OPDL_CONS_Q_DEPTH, + .max_num_events = OPDL_INFLIGHT_EVENTS_TOTAL, + .event_dev_cap = RTE_EVENT_DEV_CAP_BURST_MODE, + }; + + *info = evdev_opdl_info; +} + +static void +opdl_dump(struct rte_eventdev *dev, FILE *f) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + + if (!device->do_validation) + return; + + fprintf(f, + "\n\n -- RING STATISTICS --\n"); + uint32_t i; + for (i = 0; i < device->nb_opdls; i++) + opdl_ring_dump(device->opdl[i], f); + + fprintf(f, + "\n\n -- PORT STATISTICS --\n" + "Type Port Index Port Id Queue Id Av. Req Size " + "Av. Grant Size Av. Cycles PP" + " Empty DEQs Non Empty DEQs Pkts Processed\n"); + + for (i = 0; i < device->max_port_nb; i++) { + char queue_id[64]; + char total_cyc[64]; + const char *p_type; + + uint64_t cne, cpg; + struct opdl_port *port = &device->ports[i]; + + if (port->initialized) { + cne = port->port_stat[claim_non_empty]; + cpg = port->port_stat[claim_pkts_granted]; + if (port->p_type == OPDL_REGULAR_PORT) + p_type = "REG"; + else if (port->p_type == OPDL_PURE_RX_PORT) + p_type = " RX"; + else if (port->p_type == OPDL_PURE_TX_PORT) + p_type = " TX"; + else if (port->p_type == OPDL_ASYNC_PORT) + p_type = "SYNC"; + else + p_type = "????"; + + snprintf(queue_id, sizeof(queue_id), "%02u", + port->external_qid); + if (port->p_type == OPDL_REGULAR_PORT || + port->p_type == OPDL_ASYNC_PORT) + snprintf(total_cyc, sizeof(total_cyc), + " %'16"PRIu64"", + (cpg != 0 ? + port->port_stat[total_cycles] / cpg + : 0)); + else + snprintf(total_cyc, sizeof(total_cyc), + " ----"); + fprintf(f, + "%4s %10u %8u %9s %'16"PRIu64" %'16"PRIu64" %s " + "%'16"PRIu64" %'16"PRIu64" %'16"PRIu64"\n", + p_type, + i, + port->id, + (port->external_qid == OPDL_INVALID_QID ? "---" + : queue_id), + (cne != 0 ? + port->port_stat[claim_pkts_requested] / cne + : 0), + (cne != 0 ? + port->port_stat[claim_pkts_granted] / cne + : 0), + total_cyc, + port->port_stat[claim_empty], + port->port_stat[claim_non_empty], + port->port_stat[claim_pkts_granted]); + } + } + fprintf(f, "\n"); +} + + +static void +opdl_stop(struct rte_eventdev *dev) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + + opdl_xstats_uninit(dev); + + destroy_queues_and_rings(dev); + + + device->started = 0; + + rte_smp_wmb(); +} + +static int +opdl_start(struct rte_eventdev *dev) +{ + int err = 0; + + if (!err) + err = create_queues_and_rings(dev); + + + if (!err) + err = assign_internal_queue_ids(dev); + + + if (!err) + err = initialise_queue_zero_ports(dev); + + + if (!err) + err = initialise_all_other_ports(dev); + + + if (!err) + err = check_queues_linked(dev); + + + if (!err) + err = opdl_add_event_handlers(dev); + + + if (!err) + err = build_all_dependencies(dev); + + if (!err) { + opdl_xstats_init(dev); + + struct opdl_evdev *device = opdl_pmd_priv(dev); + + PMD_DRV_LOG(INFO, "DEV_ID:[%02d] : " + "SUCCESS : Created %u total queues (%u ex, %u in)," + " %u opdls, %u event_dev ports, %u input ports", + opdl_pmd_dev_id(device), + device->nb_queues, + (device->nb_queues - device->nb_opdls), + device->nb_opdls, + device->nb_opdls, + device->nb_ports, + device->queue[0].nb_ports); + } else + opdl_stop(dev); + + return err; +} + +static int +opdl_close(struct rte_eventdev *dev) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + uint32_t i; + + for (i = 0; i < device->max_port_nb; i++) { + memset(&device->ports[i], + 0, + sizeof(struct opdl_port)); + } + + memset(&device->s_md, + 0x0, + sizeof(struct opdl_stage_meta_data)*OPDL_PORTS_MAX); + + memset(&device->q_md, + 0xFF, + sizeof(struct opdl_queue_meta_data)*OPDL_MAX_QUEUES); + + + memset(device->q_map_ex_to_in, + 0, + sizeof(uint8_t)*OPDL_INVALID_QID); + + opdl_xstats_uninit(dev); + + device->max_port_nb = 0; + + device->max_queue_nb = 0; + + device->nb_opdls = 0; + + device->nb_queues = 0; + + device->nb_ports = 0; + + device->nb_q_md = 0; + + dev->data->nb_queues = 0; + + dev->data->nb_ports = 0; + + + return 0; +} + +static int +assign_numa_node(const char *key __rte_unused, const char *value, void *opaque) +{ + int *socket_id = opaque; + *socket_id = atoi(value); + if (*socket_id >= RTE_MAX_NUMA_NODES) + return -1; + return 0; +} + +static int +set_do_validation(const char *key __rte_unused, const char *value, void *opaque) +{ + int *do_val = opaque; + *do_val = atoi(value); + if (*do_val != 0) + *do_val = 1; + + return 0; +} +static int +set_do_test(const char *key __rte_unused, const char *value, void *opaque) +{ + int *do_test = opaque; + + *do_test = atoi(value); + + if (*do_test != 0) + *do_test = 1; + return 0; +} + +static int +opdl_probe(struct rte_vdev_device *vdev) +{ + static struct rte_eventdev_ops evdev_opdl_ops = { + .dev_configure = opdl_dev_configure, + .dev_infos_get = opdl_info_get, + .dev_close = opdl_close, + .dev_start = opdl_start, + .dev_stop = opdl_stop, + .dump = opdl_dump, + + .queue_def_conf = opdl_queue_def_conf, + .queue_setup = opdl_queue_setup, + .queue_release = opdl_queue_release, + .port_def_conf = opdl_port_def_conf, + .port_setup = opdl_port_setup, + .port_release = opdl_port_release, + .port_link = opdl_port_link, + .port_unlink = opdl_port_unlink, + + + .xstats_get = opdl_xstats_get, + .xstats_get_names = opdl_xstats_get_names, + .xstats_get_by_name = opdl_xstats_get_by_name, + .xstats_reset = opdl_xstats_reset, + }; + + static const char *const args[] = { + NUMA_NODE_ARG, + DO_VALIDATION_ARG, + DO_TEST_ARG, + NULL + }; + const char *name; + const char *params; + struct rte_eventdev *dev; + struct opdl_evdev *opdl; + int socket_id = rte_socket_id(); + int do_validation = 0; + int do_test = 0; + int str_len; + int test_result = 0; + + name = rte_vdev_device_name(vdev); + params = rte_vdev_device_args(vdev); + if (params != NULL && params[0] != '\0') { + struct rte_kvargs *kvlist = rte_kvargs_parse(params, args); + + if (!kvlist) { + PMD_DRV_LOG(INFO, + "Ignoring unsupported parameters when creating device '%s'\n", + name); + } else { + int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG, + assign_numa_node, &socket_id); + if (ret != 0) { + PMD_DRV_LOG(ERR, + "%s: Error parsing numa node parameter", + name); + + rte_kvargs_free(kvlist); + return ret; + } + + ret = rte_kvargs_process(kvlist, DO_VALIDATION_ARG, + set_do_validation, &do_validation); + if (ret != 0) { + PMD_DRV_LOG(ERR, + "%s: Error parsing do validation parameter", + name); + rte_kvargs_free(kvlist); + return ret; + } + + ret = rte_kvargs_process(kvlist, DO_TEST_ARG, + set_do_test, &do_test); + if (ret != 0) { + PMD_DRV_LOG(ERR, + "%s: Error parsing do test parameter", + name); + rte_kvargs_free(kvlist); + return ret; + } + + rte_kvargs_free(kvlist); + } + } + dev = rte_event_pmd_vdev_init(name, + sizeof(struct opdl_evdev), socket_id); + + if (dev == NULL) { + PMD_DRV_LOG(ERR, "eventdev vdev init() failed"); + return -EFAULT; + } + + PMD_DRV_LOG(INFO, "DEV_ID:[%02d] : " + "Success - creating eventdev device %s, numa_node:[%d], do_valdation:[%s]" + " , self_test:[%s]\n", + dev->data->dev_id, + name, + socket_id, + (do_validation ? "true" : "false"), + (do_test ? "true" : "false")); + + dev->dev_ops = &evdev_opdl_ops; + + dev->enqueue = opdl_event_enqueue; + dev->enqueue_burst = opdl_event_enqueue_burst; + dev->enqueue_new_burst = opdl_event_enqueue_burst; + dev->enqueue_forward_burst = opdl_event_enqueue_burst; + dev->dequeue = opdl_event_dequeue; + dev->dequeue_burst = opdl_event_dequeue_burst; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + opdl = dev->data->dev_private; + opdl->data = dev->data; + opdl->socket = socket_id; + opdl->do_validation = do_validation; + opdl->do_test = do_test; + str_len = strlen(name); + memcpy(opdl->service_name, name, str_len); + + if (do_test == 1) + test_result = opdl_selftest(); + + return test_result; +} + +static int +opdl_remove(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + if (name == NULL) + return -EINVAL; + + PMD_DRV_LOG(INFO, "Closing eventdev opdl device %s\n", name); + + return rte_event_pmd_vdev_uninit(name); +} + +static struct rte_vdev_driver evdev_opdl_pmd_drv = { + .probe = opdl_probe, + .remove = opdl_remove +}; + +RTE_INIT(opdl_init_log) +{ + opdl_logtype_driver = rte_log_register("pmd.event.opdl.driver"); + if (opdl_logtype_driver >= 0) + rte_log_set_level(opdl_logtype_driver, RTE_LOG_INFO); +} + + +RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_OPDL_PMD, evdev_opdl_pmd_drv); +RTE_PMD_REGISTER_PARAM_STRING(event_opdl, NUMA_NODE_ARG "=<int>" + DO_VALIDATION_ARG "=<int>" DO_TEST_ARG "=<int>"); diff --git a/src/spdk/dpdk/drivers/event/opdl/opdl_evdev.h b/src/spdk/dpdk/drivers/event/opdl/opdl_evdev.h new file mode 100644 index 000000000..610b58b35 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/opdl_evdev.h @@ -0,0 +1,314 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _OPDL_EVDEV_H_ +#define _OPDL_EVDEV_H_ + +#include <rte_eventdev.h> +#include <rte_eventdev_pmd_vdev.h> +#include <rte_atomic.h> +#include "opdl_ring.h" + +#define OPDL_QID_NUM_FIDS 1024 +#define OPDL_IQS_MAX 1 +#define OPDL_Q_PRIORITY_MAX 1 +#define OPDL_PORTS_MAX 64 +#define MAX_OPDL_CONS_Q_DEPTH 128 +/* OPDL size */ +#define OPDL_INFLIGHT_EVENTS_TOTAL 4096 +/* allow for lots of over-provisioning */ +#define OPDL_FRAGMENTS_MAX 1 + +/* report dequeue burst sizes in buckets */ +#define OPDL_DEQ_STAT_BUCKET_SHIFT 2 +/* how many packets pulled from port by sched */ +#define SCHED_DEQUEUE_BURST_SIZE 32 + +/* size of our history list */ +#define OPDL_PORT_HIST_LIST (MAX_OPDL_PROD_Q_DEPTH) + +/* how many data points use for average stats */ +#define NUM_SAMPLES 64 + +#define EVENTDEV_NAME_OPDL_PMD event_opdl +#define OPDL_PMD_NAME RTE_STR(event_opdl) +#define OPDL_PMD_NAME_MAX 64 + +#define OPDL_INVALID_QID 255 + +#define OPDL_SCHED_TYPE_DIRECT (RTE_SCHED_TYPE_PARALLEL + 1) + +#define OPDL_NUM_POLL_BUCKETS \ + (MAX_OPDL_CONS_Q_DEPTH >> OPDL_DEQ_STAT_BUCKET_SHIFT) + +enum { + QE_FLAG_VALID_SHIFT = 0, + QE_FLAG_COMPLETE_SHIFT, + QE_FLAG_NOT_EOP_SHIFT, + _QE_FLAG_COUNT +}; + +enum port_type { + OPDL_INVALID_PORT = 0, + OPDL_REGULAR_PORT = 1, + OPDL_PURE_RX_PORT, + OPDL_PURE_TX_PORT, + OPDL_ASYNC_PORT +}; + +enum queue_type { + OPDL_Q_TYPE_INVALID = 0, + OPDL_Q_TYPE_SINGLE_LINK = 1, + OPDL_Q_TYPE_ATOMIC, + OPDL_Q_TYPE_ORDERED +}; + +enum queue_pos { + OPDL_Q_POS_START = 0, + OPDL_Q_POS_MIDDLE, + OPDL_Q_POS_END +}; + +#define QE_FLAG_VALID (1 << QE_FLAG_VALID_SHIFT) /* for NEW FWD, FRAG */ +#define QE_FLAG_COMPLETE (1 << QE_FLAG_COMPLETE_SHIFT) /* set for FWD, DROP */ +#define QE_FLAG_NOT_EOP (1 << QE_FLAG_NOT_EOP_SHIFT) /* set for FRAG only */ + +static const uint8_t opdl_qe_flag_map[] = { + QE_FLAG_VALID /* NEW Event */, + QE_FLAG_VALID | QE_FLAG_COMPLETE /* FWD Event */, + QE_FLAG_COMPLETE /* RELEASE Event */, + + /* Values which can be used for future support for partial + * events, i.e. where one event comes back to the scheduler + * as multiple which need to be tracked together + */ + QE_FLAG_VALID | QE_FLAG_COMPLETE | QE_FLAG_NOT_EOP, +}; + + +enum port_xstat_name { + claim_pkts_requested = 0, + claim_pkts_granted, + claim_non_empty, + claim_empty, + total_cycles, + max_num_port_xstat +}; + +#define OPDL_MAX_PORT_XSTAT_NUM (OPDL_PORTS_MAX * max_num_port_xstat) + +struct opdl_port; + +typedef uint16_t (*opdl_enq_operation)(struct opdl_port *port, + const struct rte_event ev[], + uint16_t num); + +typedef uint16_t (*opdl_deq_operation)(struct opdl_port *port, + struct rte_event ev[], + uint16_t num); + +struct opdl_evdev; + +struct opdl_stage_meta_data { + uint32_t num_claimed; /* number of entries claimed by this stage */ + uint32_t burst_sz; /* Port claim burst size */ +}; + +struct opdl_port { + + /* back pointer */ + struct opdl_evdev *opdl; + + /* enq handler & stage instance */ + opdl_enq_operation enq; + struct opdl_stage *enq_stage_inst; + + /* deq handler & stage instance */ + opdl_deq_operation deq; + struct opdl_stage *deq_stage_inst; + + /* port id has correctly been set */ + uint8_t configured; + + /* set when the port is initialized */ + uint8_t initialized; + + /* A numeric ID for the port */ + uint8_t id; + + /* Space for claimed entries */ + struct rte_event *entries[MAX_OPDL_CONS_Q_DEPTH]; + + /* RX/REGULAR/TX/ASYNC - determined on position in queue */ + enum port_type p_type; + + /* if the claim is static atomic type */ + bool atomic_claim; + + /* Queue linked to this port - internal queue id*/ + uint8_t queue_id; + + /* Queue linked to this port - external queue id*/ + uint8_t external_qid; + + /* Next queue linked to this port - external queue id*/ + uint8_t next_external_qid; + + /* number of instances of this stage */ + uint32_t num_instance; + + /* instance ID of this stage*/ + uint32_t instance_id; + + /* track packets in and out of this port */ + uint64_t port_stat[max_num_port_xstat]; + uint64_t start_cycles; +}; + +struct opdl_queue_meta_data { + uint8_t ext_id; + enum queue_type type; + int8_t setup; +}; + +struct opdl_xstats_entry { + struct rte_event_dev_xstats_name stat; + unsigned int id; + uint64_t *value; +}; + +struct opdl_queue { + + /* Opdl ring this queue is associated with */ + uint32_t opdl_id; + + /* type and position have correctly been set */ + uint8_t configured; + + /* port number and associated ports have been associated */ + uint8_t initialized; + + /* type of this queue (Atomic, Ordered, Parallel, Direct)*/ + enum queue_type q_type; + + /* position of queue (START, MIDDLE, END) */ + enum queue_pos q_pos; + + /* external queue id. It is mapped to the queue position */ + uint8_t external_qid; + + struct opdl_port *ports[OPDL_PORTS_MAX]; + uint32_t nb_ports; + + /* priority, reserved for future */ + uint8_t priority; +}; + + +#define OPDL_TUR_PER_DEV 12 + +/* PMD needs an extra queue per Opdl */ +#define OPDL_MAX_QUEUES (RTE_EVENT_MAX_QUEUES_PER_DEV - OPDL_TUR_PER_DEV) + + +struct opdl_evdev { + struct rte_eventdev_data *data; + + uint8_t started; + + /* Max number of ports and queues*/ + uint32_t max_port_nb; + uint32_t max_queue_nb; + + /* slots in the opdl ring */ + uint32_t nb_events_limit; + + /* + * Array holding all opdl for this device + */ + struct opdl_ring *opdl[OPDL_TUR_PER_DEV]; + uint32_t nb_opdls; + + struct opdl_queue_meta_data q_md[OPDL_MAX_QUEUES]; + uint32_t nb_q_md; + + /* Internal queues - one per logical queue */ + struct opdl_queue + queue[RTE_EVENT_MAX_QUEUES_PER_DEV] __rte_cache_aligned; + + uint32_t nb_queues; + + struct opdl_stage_meta_data s_md[OPDL_PORTS_MAX]; + + /* Contains all ports - load balanced and directed */ + struct opdl_port ports[OPDL_PORTS_MAX] __rte_cache_aligned; + uint32_t nb_ports; + + uint8_t q_map_ex_to_in[OPDL_INVALID_QID]; + + /* Stats */ + struct opdl_xstats_entry port_xstat[OPDL_MAX_PORT_XSTAT_NUM]; + + char service_name[OPDL_PMD_NAME_MAX]; + int socket; + int do_validation; + int do_test; +}; + + +static inline struct opdl_evdev * +opdl_pmd_priv(const struct rte_eventdev *eventdev) +{ + return eventdev->data->dev_private; +} + +static inline uint8_t +opdl_pmd_dev_id(const struct opdl_evdev *opdl) +{ + return opdl->data->dev_id; +} + +static inline const struct opdl_evdev * +opdl_pmd_priv_const(const struct rte_eventdev *eventdev) +{ + return eventdev->data->dev_private; +} + +uint16_t opdl_event_enqueue(void *port, const struct rte_event *ev); +uint16_t opdl_event_enqueue_burst(void *port, const struct rte_event ev[], + uint16_t num); + +uint16_t opdl_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); +uint16_t opdl_event_dequeue_burst(void *port, struct rte_event *ev, + uint16_t num, uint64_t wait); +void opdl_event_schedule(struct rte_eventdev *dev); + +void opdl_xstats_init(struct rte_eventdev *dev); +int opdl_xstats_uninit(struct rte_eventdev *dev); +int opdl_xstats_get_names(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int size); +int opdl_xstats_get(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + const unsigned int ids[], uint64_t values[], unsigned int n); +uint64_t opdl_xstats_get_by_name(const struct rte_eventdev *dev, + const char *name, unsigned int *id); +int opdl_xstats_reset(struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + int16_t queue_port_id, + const uint32_t ids[], + uint32_t nb_ids); + +int opdl_add_event_handlers(struct rte_eventdev *dev); +int build_all_dependencies(struct rte_eventdev *dev); +int check_queues_linked(struct rte_eventdev *dev); +int create_queues_and_rings(struct rte_eventdev *dev); +int initialise_all_other_ports(struct rte_eventdev *dev); +int initialise_queue_zero_ports(struct rte_eventdev *dev); +int assign_internal_queue_ids(struct rte_eventdev *dev); +void destroy_queues_and_rings(struct rte_eventdev *dev); +int opdl_selftest(void); + +#endif /* _OPDL_EVDEV_H_ */ diff --git a/src/spdk/dpdk/drivers/event/opdl/opdl_evdev_init.c b/src/spdk/dpdk/drivers/event/opdl/opdl_evdev_init.c new file mode 100644 index 000000000..15aae4752 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/opdl_evdev_init.c @@ -0,0 +1,943 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <inttypes.h> +#include <string.h> + +#include <rte_bus_vdev.h> +#include <rte_errno.h> +#include <rte_cycles.h> +#include <rte_memzone.h> + +#include "opdl_evdev.h" +#include "opdl_ring.h" +#include "opdl_log.h" + + +static __rte_always_inline uint32_t +enqueue_check(struct opdl_port *p, + const struct rte_event ev[], + uint16_t num, + uint16_t num_events) +{ + uint16_t i; + + if (p->opdl->do_validation) { + + for (i = 0; i < num; i++) { + if (ev[i].queue_id != p->next_external_qid) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "ERROR - port:[%u] - event wants" + " to enq to q_id[%u]," + " but should be [%u]", + opdl_pmd_dev_id(p->opdl), + p->id, + ev[i].queue_id, + p->next_external_qid); + rte_errno = EINVAL; + return 0; + } + } + + /* Stats */ + if (p->p_type == OPDL_PURE_RX_PORT || + p->p_type == OPDL_ASYNC_PORT) { + /* Stats */ + if (num_events) { + p->port_stat[claim_pkts_requested] += num; + p->port_stat[claim_pkts_granted] += num_events; + p->port_stat[claim_non_empty]++; + p->start_cycles = rte_rdtsc(); + } else { + p->port_stat[claim_empty]++; + p->start_cycles = 0; + } + } else { + if (p->start_cycles) { + uint64_t end_cycles = rte_rdtsc(); + p->port_stat[total_cycles] += + end_cycles - p->start_cycles; + } + } + } else { + if (num > 0 && + ev[0].queue_id != p->next_external_qid) { + rte_errno = EINVAL; + return 0; + } + } + + return num; +} + +static __rte_always_inline void +update_on_dequeue(struct opdl_port *p, + struct rte_event ev[], + uint16_t num, + uint16_t num_events) +{ + if (p->opdl->do_validation) { + int16_t i; + for (i = 0; i < num; i++) + ev[i].queue_id = + p->opdl->queue[p->queue_id].external_qid; + + /* Stats */ + if (num_events) { + p->port_stat[claim_pkts_requested] += num; + p->port_stat[claim_pkts_granted] += num_events; + p->port_stat[claim_non_empty]++; + p->start_cycles = rte_rdtsc(); + } else { + p->port_stat[claim_empty]++; + p->start_cycles = 0; + } + } else { + if (num > 0) + ev[0].queue_id = + p->opdl->queue[p->queue_id].external_qid; + } +} + + +/* + * Error RX enqueue: + * + * + */ + +static uint16_t +opdl_rx_error_enqueue(struct opdl_port *p, + const struct rte_event ev[], + uint16_t num) +{ + RTE_SET_USED(p); + RTE_SET_USED(ev); + RTE_SET_USED(num); + + rte_errno = ENOSPC; + + return 0; +} + +/* + * RX enqueue: + * + * This function handles enqueue for a single input stage_inst with + * threadsafe disabled or enabled. eg 1 thread using a stage_inst or + * multiple threads sharing a stage_inst + */ + +static uint16_t +opdl_rx_enqueue(struct opdl_port *p, + const struct rte_event ev[], + uint16_t num) +{ + uint16_t enqueued = 0; + + enqueued = opdl_ring_input(opdl_stage_get_opdl_ring(p->enq_stage_inst), + ev, + num, + false); + if (!enqueue_check(p, ev, num, enqueued)) + return 0; + + + if (enqueued < num) + rte_errno = ENOSPC; + + return enqueued; +} + +/* + * Error TX handler + * + */ + +static uint16_t +opdl_tx_error_dequeue(struct opdl_port *p, + struct rte_event ev[], + uint16_t num) +{ + RTE_SET_USED(p); + RTE_SET_USED(ev); + RTE_SET_USED(num); + + rte_errno = ENOSPC; + + return 0; +} + +/* + * TX single threaded claim + * + * This function handles dequeue for a single worker stage_inst with + * threadsafe disabled. eg 1 thread using an stage_inst + */ + +static uint16_t +opdl_tx_dequeue_single_thread(struct opdl_port *p, + struct rte_event ev[], + uint16_t num) +{ + uint16_t returned; + + struct opdl_ring *ring; + + ring = opdl_stage_get_opdl_ring(p->deq_stage_inst); + + returned = opdl_ring_copy_to_burst(ring, + p->deq_stage_inst, + ev, + num, + false); + + update_on_dequeue(p, ev, num, returned); + + return returned; +} + +/* + * TX multi threaded claim + * + * This function handles dequeue for multiple worker stage_inst with + * threadsafe disabled. eg multiple stage_inst each with its own instance + */ + +static uint16_t +opdl_tx_dequeue_multi_inst(struct opdl_port *p, + struct rte_event ev[], + uint16_t num) +{ + uint32_t num_events = 0; + + num_events = opdl_stage_claim(p->deq_stage_inst, + (void *)ev, + num, + NULL, + false, + false); + + update_on_dequeue(p, ev, num, num_events); + + return opdl_stage_disclaim(p->deq_stage_inst, num_events, false); +} + + +/* + * Worker thread claim + * + */ + +static uint16_t +opdl_claim(struct opdl_port *p, struct rte_event ev[], uint16_t num) +{ + uint32_t num_events = 0; + + if (unlikely(num > MAX_OPDL_CONS_Q_DEPTH)) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Attempt to dequeue num of events larger than port (%d) max", + opdl_pmd_dev_id(p->opdl), + p->id); + rte_errno = EINVAL; + return 0; + } + + + num_events = opdl_stage_claim(p->deq_stage_inst, + (void *)ev, + num, + NULL, + false, + p->atomic_claim); + + + update_on_dequeue(p, ev, num, num_events); + + return num_events; +} + +/* + * Worker thread disclaim + */ + +static uint16_t +opdl_disclaim(struct opdl_port *p, const struct rte_event ev[], uint16_t num) +{ + uint16_t enqueued = 0; + + uint32_t i = 0; + + for (i = 0; i < num; i++) + opdl_ring_cas_slot(p->enq_stage_inst, &ev[i], + i, p->atomic_claim); + + enqueued = opdl_stage_disclaim(p->enq_stage_inst, + num, + false); + + return enqueue_check(p, ev, num, enqueued); +} + +static __rte_always_inline struct opdl_stage * +stage_for_port(struct opdl_queue *q, unsigned int i) +{ + if (q->q_pos == OPDL_Q_POS_START || q->q_pos == OPDL_Q_POS_MIDDLE) + return q->ports[i]->enq_stage_inst; + else + return q->ports[i]->deq_stage_inst; +} + +static int opdl_add_deps(struct opdl_evdev *device, + int q_id, + int deps_q_id) +{ + unsigned int i, j; + int status; + struct opdl_ring *ring; + struct opdl_queue *queue = &device->queue[q_id]; + struct opdl_queue *queue_deps = &device->queue[deps_q_id]; + struct opdl_stage *dep_stages[OPDL_PORTS_MAX]; + + /* sanity check that all stages are for same opdl ring */ + for (i = 0; i < queue->nb_ports; i++) { + struct opdl_ring *r = + opdl_stage_get_opdl_ring(stage_for_port(queue, i)); + for (j = 0; j < queue_deps->nb_ports; j++) { + struct opdl_ring *rj = + opdl_stage_get_opdl_ring( + stage_for_port(queue_deps, j)); + if (r != rj) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Stages and dependents" + " are not for same opdl ring", + opdl_pmd_dev_id(device)); + uint32_t k; + for (k = 0; k < device->nb_opdls; k++) { + opdl_ring_dump(device->opdl[k], + stdout); + } + return -EINVAL; + } + } + } + + /* Gather all stages instance in deps */ + for (i = 0; i < queue_deps->nb_ports; i++) + dep_stages[i] = stage_for_port(queue_deps, i); + + + /* Add all deps for each port->stage_inst in this queue */ + for (i = 0; i < queue->nb_ports; i++) { + + ring = opdl_stage_get_opdl_ring(stage_for_port(queue, i)); + + status = opdl_stage_deps_add(ring, + stage_for_port(queue, i), + queue->ports[i]->num_instance, + queue->ports[i]->instance_id, + dep_stages, + queue_deps->nb_ports); + if (status < 0) + return -EINVAL; + } + + return 0; +} + +int +opdl_add_event_handlers(struct rte_eventdev *dev) +{ + int err = 0; + + struct opdl_evdev *device = opdl_pmd_priv(dev); + unsigned int i; + + for (i = 0; i < device->max_port_nb; i++) { + + struct opdl_port *port = &device->ports[i]; + + if (port->configured) { + if (port->p_type == OPDL_PURE_RX_PORT) { + port->enq = opdl_rx_enqueue; + port->deq = opdl_tx_error_dequeue; + + } else if (port->p_type == OPDL_PURE_TX_PORT) { + + port->enq = opdl_rx_error_enqueue; + + if (port->num_instance == 1) + port->deq = + opdl_tx_dequeue_single_thread; + else + port->deq = opdl_tx_dequeue_multi_inst; + + } else if (port->p_type == OPDL_REGULAR_PORT) { + + port->enq = opdl_disclaim; + port->deq = opdl_claim; + + } else if (port->p_type == OPDL_ASYNC_PORT) { + + port->enq = opdl_rx_enqueue; + + /* Always single instance */ + port->deq = opdl_tx_dequeue_single_thread; + } else { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "port:[%u] has invalid port type - ", + opdl_pmd_dev_id(port->opdl), + port->id); + err = -EINVAL; + break; + } + port->initialized = 1; + } + } + + if (!err) + fprintf(stdout, "Success - enqueue/dequeue handler(s) added\n"); + return err; +} + +int +build_all_dependencies(struct rte_eventdev *dev) +{ + + int err = 0; + unsigned int i; + struct opdl_evdev *device = opdl_pmd_priv(dev); + + uint8_t start_qid = 0; + + for (i = 0; i < RTE_EVENT_MAX_QUEUES_PER_DEV; i++) { + struct opdl_queue *queue = &device->queue[i]; + if (!queue->initialized) + break; + + if (queue->q_pos == OPDL_Q_POS_START) { + start_qid = i; + continue; + } + + if (queue->q_pos == OPDL_Q_POS_MIDDLE) { + err = opdl_add_deps(device, i, i-1); + if (err < 0) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "dependency addition for queue:[%u] - FAILED", + dev->data->dev_id, + queue->external_qid); + break; + } + } + + if (queue->q_pos == OPDL_Q_POS_END) { + /* Add this dependency */ + err = opdl_add_deps(device, i, i-1); + if (err < 0) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "dependency addition for queue:[%u] - FAILED", + dev->data->dev_id, + queue->external_qid); + break; + } + /* Add dependency for rx on tx */ + err = opdl_add_deps(device, start_qid, i); + if (err < 0) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "dependency addition for queue:[%u] - FAILED", + dev->data->dev_id, + queue->external_qid); + break; + } + } + } + + if (!err) + fprintf(stdout, "Success - dependencies built\n"); + + return err; +} +int +check_queues_linked(struct rte_eventdev *dev) +{ + + int err = 0; + unsigned int i; + struct opdl_evdev *device = opdl_pmd_priv(dev); + uint32_t nb_iq = 0; + + for (i = 0; i < RTE_EVENT_MAX_QUEUES_PER_DEV; i++) { + struct opdl_queue *queue = &device->queue[i]; + + if (!queue->initialized) + break; + + if (queue->external_qid == OPDL_INVALID_QID) + nb_iq++; + + if (queue->nb_ports == 0) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "queue:[%u] has no associated ports", + dev->data->dev_id, + i); + err = -EINVAL; + break; + } + } + if (!err) { + if ((i - nb_iq) != device->max_queue_nb) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "%u queues counted but should be %u", + dev->data->dev_id, + i - nb_iq, + device->max_queue_nb); + err = -1; + } + + } + return err; +} + +void +destroy_queues_and_rings(struct rte_eventdev *dev) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + uint32_t i; + + for (i = 0; i < device->nb_opdls; i++) { + if (device->opdl[i]) + opdl_ring_free(device->opdl[i]); + } + + memset(&device->queue, + 0, + sizeof(struct opdl_queue) + * RTE_EVENT_MAX_QUEUES_PER_DEV); +} + +#define OPDL_ID(d)(d->nb_opdls - 1) + +static __rte_always_inline void +initialise_queue(struct opdl_evdev *device, + enum queue_pos pos, + int32_t i) +{ + struct opdl_queue *queue = &device->queue[device->nb_queues]; + + if (i == -1) { + queue->q_type = OPDL_Q_TYPE_ORDERED; + queue->external_qid = OPDL_INVALID_QID; + } else { + queue->q_type = device->q_md[i].type; + queue->external_qid = device->q_md[i].ext_id; + /* Add ex->in for queues setup */ + device->q_map_ex_to_in[queue->external_qid] = device->nb_queues; + } + queue->opdl_id = OPDL_ID(device); + queue->q_pos = pos; + queue->nb_ports = 0; + queue->configured = 1; + + device->nb_queues++; +} + + +static __rte_always_inline int +create_opdl(struct opdl_evdev *device) +{ + int err = 0; + + char name[RTE_MEMZONE_NAMESIZE]; + + snprintf(name, RTE_MEMZONE_NAMESIZE, + "%s_%u", device->service_name, device->nb_opdls); + + device->opdl[device->nb_opdls] = + opdl_ring_create(name, + device->nb_events_limit, + sizeof(struct rte_event), + device->max_port_nb * 2, + device->socket); + + if (!device->opdl[device->nb_opdls]) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "opdl ring %u creation - FAILED", + opdl_pmd_dev_id(device), + device->nb_opdls); + err = -EINVAL; + } else { + device->nb_opdls++; + } + return err; +} + +static __rte_always_inline int +create_link_opdl(struct opdl_evdev *device, uint32_t index) +{ + + int err = 0; + + if (device->q_md[index + 1].type != + OPDL_Q_TYPE_SINGLE_LINK) { + + /* async queue with regular + * queue following it + */ + + /* create a new opdl ring */ + err = create_opdl(device); + if (!err) { + /* create an initial + * dummy queue for new opdl + */ + initialise_queue(device, + OPDL_Q_POS_START, + -1); + } else { + err = -EINVAL; + } + } else { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "queue %u, two consecutive" + " SINGLE_LINK queues, not allowed", + opdl_pmd_dev_id(device), + index); + err = -EINVAL; + } + + return err; +} + +int +create_queues_and_rings(struct rte_eventdev *dev) +{ + int err = 0; + + struct opdl_evdev *device = opdl_pmd_priv(dev); + + device->nb_queues = 0; + + if (device->nb_ports != device->max_port_nb) { + PMD_DRV_LOG(ERR, "Number ports setup:%u NOT EQUAL to max port" + " number:%u for this device", + device->nb_ports, + device->max_port_nb); + err = -1; + } + + if (!err) { + /* We will have at least one opdl so create it now */ + err = create_opdl(device); + } + + if (!err) { + + /* Create 1st "dummy" queue */ + initialise_queue(device, + OPDL_Q_POS_START, + -1); + + uint32_t i; + for (i = 0; i < device->nb_q_md; i++) { + + /* Check */ + if (!device->q_md[i].setup) { + + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "queue meta data slot %u" + " not setup - FAILING", + dev->data->dev_id, + i); + err = -EINVAL; + break; + } else if (device->q_md[i].type != + OPDL_Q_TYPE_SINGLE_LINK) { + + if (!device->q_md[i + 1].setup) { + /* Create a simple ORDERED/ATOMIC + * queue at the end + */ + initialise_queue(device, + OPDL_Q_POS_END, + i); + + } else { + /* Create a simple ORDERED/ATOMIC + * queue in the middle + */ + initialise_queue(device, + OPDL_Q_POS_MIDDLE, + i); + } + } else if (device->q_md[i].type == + OPDL_Q_TYPE_SINGLE_LINK) { + + /* create last queue for this opdl */ + initialise_queue(device, + OPDL_Q_POS_END, + i); + + err = create_link_opdl(device, i); + + if (err) + break; + + + } + } + } + if (err) + destroy_queues_and_rings(dev); + + return err; +} + + +int +initialise_all_other_ports(struct rte_eventdev *dev) +{ + int err = 0; + struct opdl_stage *stage_inst = NULL; + + struct opdl_evdev *device = opdl_pmd_priv(dev); + + uint32_t i; + for (i = 0; i < device->nb_ports; i++) { + struct opdl_port *port = &device->ports[i]; + struct opdl_queue *queue = &device->queue[port->queue_id]; + + if (port->queue_id == 0) { + continue; + } else if (queue->q_type != OPDL_Q_TYPE_SINGLE_LINK) { + + if (queue->q_pos == OPDL_Q_POS_MIDDLE) { + + /* Regular port with claim/disclaim */ + stage_inst = opdl_stage_add( + device->opdl[queue->opdl_id], + false, + false); + port->deq_stage_inst = stage_inst; + port->enq_stage_inst = stage_inst; + + if (queue->q_type == OPDL_Q_TYPE_ATOMIC) + port->atomic_claim = true; + else + port->atomic_claim = false; + + port->p_type = OPDL_REGULAR_PORT; + + /* Add the port to the queue array of ports */ + queue->ports[queue->nb_ports] = port; + port->instance_id = queue->nb_ports; + queue->nb_ports++; + opdl_stage_set_queue_id(stage_inst, + port->queue_id); + + } else if (queue->q_pos == OPDL_Q_POS_END) { + + /* tx port */ + stage_inst = opdl_stage_add( + device->opdl[queue->opdl_id], + false, + false); + port->deq_stage_inst = stage_inst; + port->enq_stage_inst = NULL; + port->p_type = OPDL_PURE_TX_PORT; + + /* Add the port to the queue array of ports */ + queue->ports[queue->nb_ports] = port; + port->instance_id = queue->nb_ports; + queue->nb_ports++; + } else { + + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "port %u:, linked incorrectly" + " to a q_pos START/INVALID %u", + opdl_pmd_dev_id(port->opdl), + port->id, + queue->q_pos); + err = -EINVAL; + break; + } + + } else if (queue->q_type == OPDL_Q_TYPE_SINGLE_LINK) { + + port->p_type = OPDL_ASYNC_PORT; + + /* -- tx -- */ + stage_inst = opdl_stage_add( + device->opdl[queue->opdl_id], + false, + false); /* First stage */ + port->deq_stage_inst = stage_inst; + + /* Add the port to the queue array of ports */ + queue->ports[queue->nb_ports] = port; + port->instance_id = queue->nb_ports; + queue->nb_ports++; + + if (queue->nb_ports > 1) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "queue %u:, setup as SINGLE_LINK" + " but has more than one port linked", + opdl_pmd_dev_id(port->opdl), + queue->external_qid); + err = -EINVAL; + break; + } + + /* -- single instance rx for next opdl -- */ + uint8_t next_qid = + device->q_map_ex_to_in[queue->external_qid] + 1; + if (next_qid < RTE_EVENT_MAX_QUEUES_PER_DEV && + device->queue[next_qid].configured) { + + /* Remap the queue */ + queue = &device->queue[next_qid]; + + stage_inst = opdl_stage_add( + device->opdl[queue->opdl_id], + false, + true); + port->enq_stage_inst = stage_inst; + + /* Add the port to the queue array of ports */ + queue->ports[queue->nb_ports] = port; + port->instance_id = queue->nb_ports; + queue->nb_ports++; + if (queue->nb_ports > 1) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "dummy queue %u: for " + "port %u, " + "SINGLE_LINK but has more " + "than one port linked", + opdl_pmd_dev_id(port->opdl), + next_qid, + port->id); + err = -EINVAL; + break; + } + /* Set this queue to initialized as it is never + * referenced by any ports + */ + queue->initialized = 1; + } + } + } + + /* Now that all ports are initialised we need to + * setup the last bit of stage md + */ + if (!err) { + for (i = 0; i < device->nb_ports; i++) { + struct opdl_port *port = &device->ports[i]; + struct opdl_queue *queue = + &device->queue[port->queue_id]; + + if (port->configured && + (port->queue_id != OPDL_INVALID_QID)) { + if (queue->nb_ports == 0) { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "queue:[%u] has no ports" + " linked to it", + opdl_pmd_dev_id(port->opdl), + port->id); + err = -EINVAL; + break; + } + + port->num_instance = queue->nb_ports; + port->initialized = 1; + queue->initialized = 1; + } else { + PMD_DRV_LOG(ERR, "DEV_ID:[%02d] : " + "Port:[%u] not configured invalid" + " queue configuration", + opdl_pmd_dev_id(port->opdl), + port->id); + err = -EINVAL; + break; + } + } + } + return err; +} + +int +initialise_queue_zero_ports(struct rte_eventdev *dev) +{ + int err = 0; + uint8_t mt_rx = 0; + struct opdl_stage *stage_inst = NULL; + struct opdl_queue *queue = NULL; + + struct opdl_evdev *device = opdl_pmd_priv(dev); + + /* Assign queue zero and figure out how many Q0 ports we have */ + uint32_t i; + for (i = 0; i < device->nb_ports; i++) { + struct opdl_port *port = &device->ports[i]; + if (port->queue_id == OPDL_INVALID_QID) { + port->queue_id = 0; + port->external_qid = OPDL_INVALID_QID; + port->p_type = OPDL_PURE_RX_PORT; + mt_rx++; + } + } + + /* Create the stage */ + stage_inst = opdl_stage_add(device->opdl[0], + (mt_rx > 1 ? true : false), + true); + if (stage_inst) { + + /* Assign the new created input stage to all relevant ports */ + for (i = 0; i < device->nb_ports; i++) { + struct opdl_port *port = &device->ports[i]; + if (port->queue_id == 0) { + queue = &device->queue[port->queue_id]; + port->enq_stage_inst = stage_inst; + port->deq_stage_inst = NULL; + port->configured = 1; + port->initialized = 1; + + queue->ports[queue->nb_ports] = port; + port->instance_id = queue->nb_ports; + queue->nb_ports++; + } + } + } else { + err = -1; + } + return err; +} + +int +assign_internal_queue_ids(struct rte_eventdev *dev) +{ + int err = 0; + struct opdl_evdev *device = opdl_pmd_priv(dev); + uint32_t i; + + for (i = 0; i < device->nb_ports; i++) { + struct opdl_port *port = &device->ports[i]; + if (port->external_qid != OPDL_INVALID_QID) { + port->queue_id = + device->q_map_ex_to_in[port->external_qid]; + + /* Now do the external_qid of the next queue */ + struct opdl_queue *queue = + &device->queue[port->queue_id]; + if (queue->q_pos == OPDL_Q_POS_END) + port->next_external_qid = + device->queue[port->queue_id + 2].external_qid; + else + port->next_external_qid = + device->queue[port->queue_id + 1].external_qid; + } + } + return err; +} diff --git a/src/spdk/dpdk/drivers/event/opdl/opdl_evdev_xstats.c b/src/spdk/dpdk/drivers/event/opdl/opdl_evdev_xstats.c new file mode 100644 index 000000000..27b3d8802 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/opdl_evdev_xstats.c @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include "opdl_evdev.h" +#include "opdl_log.h" + +static const char * const port_xstat_str[] = { + + "claim_pkts_requested", + "claim_pkts_granted", + "claim_non_empty", + "claim_empty", + "total_cycles", +}; + + +void +opdl_xstats_init(struct rte_eventdev *dev) +{ + uint32_t i, j; + + struct opdl_evdev *device = opdl_pmd_priv(dev); + + if (!device->do_validation) + return; + + for (i = 0; i < device->max_port_nb; i++) { + struct opdl_port *port = &device->ports[i]; + + for (j = 0; j < max_num_port_xstat; j++) { + uint32_t index = (i * max_num_port_xstat) + j; + + /* Name */ + snprintf(device->port_xstat[index].stat.name, + sizeof(device->port_xstat[index].stat.name), + "port_%02u_%s", i, port_xstat_str[j]); + + /* ID */ + device->port_xstat[index].id = index; + + /* Stats ptr */ + device->port_xstat[index].value = &port->port_stat[j]; + } + } +} + +int +opdl_xstats_uninit(struct rte_eventdev *dev) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + + if (!device->do_validation) + return 0; + + memset(device->port_xstat, + 0, + sizeof(device->port_xstat)); + + return 0; +} + +int +opdl_xstats_get_names(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int size) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + + if (!device->do_validation) + return -ENOTSUP; + + if (mode == RTE_EVENT_DEV_XSTATS_DEVICE || + mode == RTE_EVENT_DEV_XSTATS_QUEUE) + return -EINVAL; + + if (queue_port_id >= device->max_port_nb) + return -EINVAL; + + if (size < max_num_port_xstat) + return max_num_port_xstat; + + uint32_t port_idx = queue_port_id * max_num_port_xstat; + + uint32_t j; + for (j = 0; j < max_num_port_xstat; j++) { + + strcpy(xstats_names[j].name, + device->port_xstat[j + port_idx].stat.name); + ids[j] = device->port_xstat[j + port_idx].id; + } + + return max_num_port_xstat; +} + +int +opdl_xstats_get(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, + const unsigned int ids[], + uint64_t values[], unsigned int n) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + + if (!device->do_validation) + return -ENOTSUP; + + if (mode == RTE_EVENT_DEV_XSTATS_DEVICE || + mode == RTE_EVENT_DEV_XSTATS_QUEUE) + return -EINVAL; + + if (queue_port_id >= device->max_port_nb) + return -EINVAL; + + if (n > max_num_port_xstat) + return -EINVAL; + + uint32_t p_start = queue_port_id * max_num_port_xstat; + uint32_t p_finish = p_start + max_num_port_xstat; + + uint32_t i; + for (i = 0; i < n; i++) { + if (ids[i] < p_start || ids[i] >= p_finish) + return -EINVAL; + + values[i] = *(device->port_xstat[ids[i]].value); + } + + return n; +} + +uint64_t +opdl_xstats_get_by_name(const struct rte_eventdev *dev, + const char *name, unsigned int *id) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + + if (!device->do_validation) + return -ENOTSUP; + + uint32_t max_index = device->max_port_nb * max_num_port_xstat; + + uint32_t i; + for (i = 0; i < max_index; i++) { + + if (strncmp(name, + device->port_xstat[i].stat.name, + RTE_EVENT_DEV_XSTATS_NAME_SIZE) == 0) { + if (id != NULL) + *id = i; + if (device->port_xstat[i].value) + return *(device->port_xstat[i].value); + break; + } + } + return -EINVAL; +} + +int +opdl_xstats_reset(struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + int16_t queue_port_id, const uint32_t ids[], + uint32_t nb_ids) +{ + struct opdl_evdev *device = opdl_pmd_priv(dev); + + if (!device->do_validation) + return -ENOTSUP; + + RTE_SET_USED(dev); + RTE_SET_USED(mode); + RTE_SET_USED(queue_port_id); + RTE_SET_USED(ids); + RTE_SET_USED(nb_ids); + + return -ENOTSUP; +} diff --git a/src/spdk/dpdk/drivers/event/opdl/opdl_log.h b/src/spdk/dpdk/drivers/event/opdl/opdl_log.h new file mode 100644 index 000000000..ae5221c1e --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/opdl_log.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _OPDL_LOGS_H_ +#define _OPDL_LOGS_H_ + +#include <rte_log.h> + +extern int opdl_logtype_driver; + +#define PMD_DRV_LOG_RAW(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, opdl_logtype_driver, "%s(): " fmt, \ + __func__, ## args) + +#define PMD_DRV_LOG(level, fmt, args...) \ + PMD_DRV_LOG_RAW(level, fmt "\n", ## args) + + + +#endif /* _OPDL_LOGS_H_ */ diff --git a/src/spdk/dpdk/drivers/event/opdl/opdl_ring.c b/src/spdk/dpdk/drivers/event/opdl/opdl_ring.c new file mode 100644 index 000000000..c8d19fef5 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/opdl_ring.c @@ -0,0 +1,1271 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#include <rte_string_fns.h> +#include <rte_branch_prediction.h> +#include <rte_debug.h> +#include <rte_lcore.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memcpy.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_atomic.h> + +#include "opdl_ring.h" +#include "opdl_log.h" + +#define LIB_NAME "opdl_ring" + +#define OPDL_NAME_SIZE 64 + + +#define OPDL_EVENT_MASK (0x00000000000FFFFFULL) +#define OPDL_FLOWID_MASK (0xFFFFF) +#define OPDL_OPA_MASK (0xFF) +#define OPDL_OPA_OFFSET (0x38) + +int opdl_logtype_driver; + +/* Types of dependency between stages */ +enum dep_type { + DEP_NONE = 0, /* no dependency */ + DEP_DIRECT, /* stage has direct dependency */ + DEP_INDIRECT, /* in-direct dependency through other stage(s) */ + DEP_SELF, /* stage dependency on itself, used to detect loops */ +}; + +/* Shared section of stage state. + * Care is needed when accessing and the layout is important, especially to + * limit the adjacent cache-line HW prefetcher from impacting performance. + */ +struct shared_state { + /* Last known minimum sequence number of dependencies, used for multi + * thread operation + */ + uint32_t available_seq; + char _pad1[RTE_CACHE_LINE_SIZE * 3]; + uint32_t head; /* Head sequence number (for multi thread operation) */ + char _pad2[RTE_CACHE_LINE_SIZE * 3]; + struct opdl_stage *stage; /* back pointer */ + uint32_t tail; /* Tail sequence number */ + char _pad3[RTE_CACHE_LINE_SIZE * 2]; +} __rte_cache_aligned; + +/* A structure to keep track of "unfinished" claims. This is only used for + * stages that are threadsafe. Each lcore accesses its own instance of this + * structure to record the entries it has claimed. This allows one lcore to make + * multiple claims without being blocked by another. When disclaiming it moves + * forward the shared tail when the shared tail matches the tail value recorded + * here. + */ +struct claim_manager { + uint32_t num_to_disclaim; + uint32_t num_claimed; + uint32_t mgr_head; + uint32_t mgr_tail; + struct { + uint32_t head; + uint32_t tail; + } claims[OPDL_DISCLAIMS_PER_LCORE]; +} __rte_cache_aligned; + +/* Context for each stage of opdl_ring. + * Calculations on sequence numbers need to be done with other uint32_t values + * so that results are modulus 2^32, and not undefined. + */ +struct opdl_stage { + struct opdl_ring *t; /* back pointer, set at init */ + uint32_t num_slots; /* Number of slots for entries, set at init */ + uint32_t index; /* ID for this stage, set at init */ + bool threadsafe; /* Set to 1 if this stage supports threadsafe use */ + /* Last known min seq number of dependencies for used for single thread + * operation + */ + uint32_t available_seq; + uint32_t head; /* Current head for single-thread operation */ + uint32_t nb_instance; /* Number of instances */ + uint32_t instance_id; /* ID of this stage instance */ + uint16_t num_claimed; /* Number of slots claimed */ + uint16_t num_event; /* Number of events */ + uint32_t seq; /* sequence number */ + uint32_t num_deps; /* Number of direct dependencies */ + /* Keep track of all dependencies, used during init only */ + enum dep_type *dep_tracking; + /* Direct dependencies of this stage */ + struct shared_state **deps; + /* Other stages read this! */ + struct shared_state shared __rte_cache_aligned; + /* For managing disclaims in multi-threaded processing stages */ + struct claim_manager pending_disclaims[RTE_MAX_LCORE] + __rte_cache_aligned; + uint32_t shadow_head; /* Shadow head for single-thread operation */ + uint32_t queue_id; /* ID of Queue which is assigned to this stage */ + uint32_t pos; /* Atomic scan position */ +} __rte_cache_aligned; + +/* Context for opdl_ring */ +struct opdl_ring { + char name[OPDL_NAME_SIZE]; /* OPDL queue instance name */ + int socket; /* NUMA socket that memory is allocated on */ + uint32_t num_slots; /* Number of slots for entries */ + uint32_t mask; /* Mask for sequence numbers (num_slots - 1) */ + uint32_t slot_size; /* Size of each slot in bytes */ + uint32_t num_stages; /* Number of stages that have been added */ + uint32_t max_num_stages; /* Max number of stages */ + /* Stages indexed by ID */ + struct opdl_stage *stages; + /* Memory for storing slot data */ + uint8_t slots[0] __rte_cache_aligned; +}; + + +/* Return input stage of a opdl_ring */ +static __rte_always_inline struct opdl_stage * +input_stage(const struct opdl_ring *t) +{ + return &t->stages[0]; +} + +/* Check if a stage is the input stage */ +static __rte_always_inline bool +is_input_stage(const struct opdl_stage *s) +{ + return s->index == 0; +} + +/* Get slot pointer from sequence number */ +static __rte_always_inline void * +get_slot(const struct opdl_ring *t, uint32_t n) +{ + return (void *)(uintptr_t)&t->slots[(n & t->mask) * t->slot_size]; +} + +/* Find how many entries are available for processing */ +static __rte_always_inline uint32_t +available(const struct opdl_stage *s) +{ + if (s->threadsafe == true) { + uint32_t n = __atomic_load_n(&s->shared.available_seq, + __ATOMIC_ACQUIRE) - + __atomic_load_n(&s->shared.head, + __ATOMIC_ACQUIRE); + + /* Return 0 if available_seq needs to be updated */ + return (n <= s->num_slots) ? n : 0; + } + + /* Single threaded */ + return s->available_seq - s->head; +} + +/* Read sequence number of dependencies and find minimum */ +static __rte_always_inline void +update_available_seq(struct opdl_stage *s) +{ + uint32_t i; + uint32_t this_tail = s->shared.tail; + uint32_t min_seq = __atomic_load_n(&s->deps[0]->tail, __ATOMIC_ACQUIRE); + /* Input stage sequence numbers are greater than the sequence numbers of + * its dependencies so an offset of t->num_slots is needed when + * calculating available slots and also the condition which is used to + * determine the dependencies minimum sequence number must be reverted. + */ + uint32_t wrap; + + if (is_input_stage(s)) { + wrap = s->num_slots; + for (i = 1; i < s->num_deps; i++) { + uint32_t seq = __atomic_load_n(&s->deps[i]->tail, + __ATOMIC_ACQUIRE); + if ((this_tail - seq) > (this_tail - min_seq)) + min_seq = seq; + } + } else { + wrap = 0; + for (i = 1; i < s->num_deps; i++) { + uint32_t seq = __atomic_load_n(&s->deps[i]->tail, + __ATOMIC_ACQUIRE); + if ((seq - this_tail) < (min_seq - this_tail)) + min_seq = seq; + } + } + + if (s->threadsafe == false) + s->available_seq = min_seq + wrap; + else + __atomic_store_n(&s->shared.available_seq, min_seq + wrap, + __ATOMIC_RELEASE); +} + +/* Wait until the number of available slots reaches number requested */ +static __rte_always_inline void +wait_for_available(struct opdl_stage *s, uint32_t n) +{ + while (available(s) < n) { + rte_pause(); + update_available_seq(s); + } +} + +/* Return number of slots to process based on number requested and mode */ +static __rte_always_inline uint32_t +num_to_process(struct opdl_stage *s, uint32_t n, bool block) +{ + /* Don't read tail sequences of dependencies if not needed */ + if (available(s) >= n) + return n; + + update_available_seq(s); + + if (block == false) { + uint32_t avail = available(s); + + if (avail == 0) { + rte_pause(); + return 0; + } + return (avail <= n) ? avail : n; + } + + if (unlikely(n > s->num_slots)) { + PMD_DRV_LOG(ERR, "%u entries is more than max (%u)", + n, s->num_slots); + return 0; /* Avoid infinite loop */ + } + /* blocking */ + wait_for_available(s, n); + return n; +} + +/* Copy entries in to slots with wrap-around */ +static __rte_always_inline void +copy_entries_in(struct opdl_ring *t, uint32_t start, const void *entries, + uint32_t num_entries) +{ + uint32_t slot_size = t->slot_size; + uint32_t slot_index = start & t->mask; + + if (slot_index + num_entries <= t->num_slots) { + rte_memcpy(get_slot(t, start), entries, + num_entries * slot_size); + } else { + uint32_t split = t->num_slots - slot_index; + + rte_memcpy(get_slot(t, start), entries, split * slot_size); + rte_memcpy(get_slot(t, 0), + RTE_PTR_ADD(entries, split * slot_size), + (num_entries - split) * slot_size); + } +} + +/* Copy entries out from slots with wrap-around */ +static __rte_always_inline void +copy_entries_out(struct opdl_ring *t, uint32_t start, void *entries, + uint32_t num_entries) +{ + uint32_t slot_size = t->slot_size; + uint32_t slot_index = start & t->mask; + + if (slot_index + num_entries <= t->num_slots) { + rte_memcpy(entries, get_slot(t, start), + num_entries * slot_size); + } else { + uint32_t split = t->num_slots - slot_index; + + rte_memcpy(entries, get_slot(t, start), split * slot_size); + rte_memcpy(RTE_PTR_ADD(entries, split * slot_size), + get_slot(t, 0), + (num_entries - split) * slot_size); + } +} + +/* Input function optimised for single thread */ +static __rte_always_inline uint32_t +opdl_ring_input_singlethread(struct opdl_ring *t, const void *entries, + uint32_t num_entries, bool block) +{ + struct opdl_stage *s = input_stage(t); + uint32_t head = s->head; + + num_entries = num_to_process(s, num_entries, block); + if (num_entries == 0) + return 0; + + copy_entries_in(t, head, entries, num_entries); + + s->head += num_entries; + __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + + return num_entries; +} + +/* Convert head and tail of claim_manager into valid index */ +static __rte_always_inline uint32_t +claim_mgr_index(uint32_t n) +{ + return n & (OPDL_DISCLAIMS_PER_LCORE - 1); +} + +/* Check if there are available slots in claim_manager */ +static __rte_always_inline bool +claim_mgr_available(struct claim_manager *mgr) +{ + return (mgr->mgr_head < (mgr->mgr_tail + OPDL_DISCLAIMS_PER_LCORE)) ? + true : false; +} + +/* Record a new claim. Only use after first checking an entry is available */ +static __rte_always_inline void +claim_mgr_add(struct claim_manager *mgr, uint32_t tail, uint32_t head) +{ + if ((mgr->mgr_head != mgr->mgr_tail) && + (mgr->claims[claim_mgr_index(mgr->mgr_head - 1)].head == + tail)) { + /* Combine with previous claim */ + mgr->claims[claim_mgr_index(mgr->mgr_head - 1)].head = head; + } else { + mgr->claims[claim_mgr_index(mgr->mgr_head)].head = head; + mgr->claims[claim_mgr_index(mgr->mgr_head)].tail = tail; + mgr->mgr_head++; + } + + mgr->num_claimed += (head - tail); +} + +/* Read the oldest recorded claim */ +static __rte_always_inline bool +claim_mgr_read(struct claim_manager *mgr, uint32_t *tail, uint32_t *head) +{ + if (mgr->mgr_head == mgr->mgr_tail) + return false; + + *head = mgr->claims[claim_mgr_index(mgr->mgr_tail)].head; + *tail = mgr->claims[claim_mgr_index(mgr->mgr_tail)].tail; + return true; +} + +/* Remove the oldest recorded claim. Only use after first reading the entry */ +static __rte_always_inline void +claim_mgr_remove(struct claim_manager *mgr) +{ + mgr->num_claimed -= (mgr->claims[claim_mgr_index(mgr->mgr_tail)].head - + mgr->claims[claim_mgr_index(mgr->mgr_tail)].tail); + mgr->mgr_tail++; +} + +/* Update tail in the oldest claim. Only use after first reading the entry */ +static __rte_always_inline void +claim_mgr_move_tail(struct claim_manager *mgr, uint32_t num_entries) +{ + mgr->num_claimed -= num_entries; + mgr->claims[claim_mgr_index(mgr->mgr_tail)].tail += num_entries; +} + +static __rte_always_inline void +opdl_stage_disclaim_multithread_n(struct opdl_stage *s, + uint32_t num_entries, bool block) +{ + struct claim_manager *disclaims = &s->pending_disclaims[rte_lcore_id()]; + uint32_t head; + uint32_t tail; + + while (num_entries) { + bool ret = claim_mgr_read(disclaims, &tail, &head); + + if (ret == false) + break; /* nothing is claimed */ + /* There should be no race condition here. If shared.tail + * matches, no other core can update it until this one does. + */ + if (__atomic_load_n(&s->shared.tail, __ATOMIC_ACQUIRE) == + tail) { + if (num_entries >= (head - tail)) { + claim_mgr_remove(disclaims); + __atomic_store_n(&s->shared.tail, head, + __ATOMIC_RELEASE); + num_entries -= (head - tail); + } else { + claim_mgr_move_tail(disclaims, num_entries); + __atomic_store_n(&s->shared.tail, + num_entries + tail, + __ATOMIC_RELEASE); + num_entries = 0; + } + } else if (block == false) + break; /* blocked by other thread */ + /* Keep going until num_entries are disclaimed. */ + rte_pause(); + } + + disclaims->num_to_disclaim = num_entries; +} + +/* Move head atomically, returning number of entries available to process and + * the original value of head. For non-input stages, the claim is recorded + * so that the tail can be updated later by opdl_stage_disclaim(). + */ +static __rte_always_inline void +move_head_atomically(struct opdl_stage *s, uint32_t *num_entries, + uint32_t *old_head, bool block, bool claim_func) +{ + uint32_t orig_num_entries = *num_entries; + uint32_t ret; + struct claim_manager *disclaims = &s->pending_disclaims[rte_lcore_id()]; + + /* Attempt to disclaim any outstanding claims */ + opdl_stage_disclaim_multithread_n(s, disclaims->num_to_disclaim, + false); + + *old_head = __atomic_load_n(&s->shared.head, __ATOMIC_ACQUIRE); + while (true) { + bool success; + /* If called by opdl_ring_input(), claim does not need to be + * recorded, as there will be no disclaim. + */ + if (claim_func) { + /* Check that the claim can be recorded */ + ret = claim_mgr_available(disclaims); + if (ret == false) { + /* exit out if claim can't be recorded */ + *num_entries = 0; + return; + } + } + + *num_entries = num_to_process(s, orig_num_entries, block); + if (*num_entries == 0) + return; + + success = __atomic_compare_exchange_n(&s->shared.head, old_head, + *old_head + *num_entries, + true, /* may fail spuriously */ + __ATOMIC_RELEASE, /* memory order on success */ + __ATOMIC_ACQUIRE); /* memory order on fail */ + if (likely(success)) + break; + rte_pause(); + } + + if (claim_func) + /* Store the claim record */ + claim_mgr_add(disclaims, *old_head, *old_head + *num_entries); +} + +/* Input function that supports multiple threads */ +static __rte_always_inline uint32_t +opdl_ring_input_multithread(struct opdl_ring *t, const void *entries, + uint32_t num_entries, bool block) +{ + struct opdl_stage *s = input_stage(t); + uint32_t old_head; + + move_head_atomically(s, &num_entries, &old_head, block, false); + if (num_entries == 0) + return 0; + + copy_entries_in(t, old_head, entries, num_entries); + + /* If another thread started inputting before this one, but hasn't + * finished, we need to wait for it to complete to update the tail. + */ + rte_wait_until_equal_32(&s->shared.tail, old_head, __ATOMIC_ACQUIRE); + + __atomic_store_n(&s->shared.tail, old_head + num_entries, + __ATOMIC_RELEASE); + + return num_entries; +} + +static __rte_always_inline uint32_t +opdl_first_entry_id(uint32_t start_seq, uint8_t nb_p_lcores, + uint8_t this_lcore) +{ + return ((nb_p_lcores <= 1) ? 0 : + (nb_p_lcores - (start_seq % nb_p_lcores) + this_lcore) % + nb_p_lcores); +} + +/* Claim slots to process, optimised for single-thread operation */ +static __rte_always_inline uint32_t +opdl_stage_claim_singlethread(struct opdl_stage *s, void *entries, + uint32_t num_entries, uint32_t *seq, bool block, bool atomic) +{ + uint32_t i = 0, j = 0, offset; + uint32_t opa_id = 0; + uint32_t flow_id = 0; + uint64_t event = 0; + void *get_slots; + struct rte_event *ev; + RTE_SET_USED(seq); + struct opdl_ring *t = s->t; + uint8_t *entries_offset = (uint8_t *)entries; + + if (!atomic) { + + offset = opdl_first_entry_id(s->seq, s->nb_instance, + s->instance_id); + + num_entries = s->nb_instance * num_entries; + + num_entries = num_to_process(s, num_entries, block); + + for (; offset < num_entries; offset += s->nb_instance) { + get_slots = get_slot(t, s->head + offset); + memcpy(entries_offset, get_slots, t->slot_size); + entries_offset += t->slot_size; + i++; + } + } else { + num_entries = num_to_process(s, num_entries, block); + + for (j = 0; j < num_entries; j++) { + ev = (struct rte_event *)get_slot(t, s->head+j); + + event = __atomic_load_n(&(ev->event), + __ATOMIC_ACQUIRE); + + opa_id = OPDL_OPA_MASK & (event >> OPDL_OPA_OFFSET); + flow_id = OPDL_FLOWID_MASK & event; + + if (opa_id >= s->queue_id) + continue; + + if ((flow_id % s->nb_instance) == s->instance_id) { + memcpy(entries_offset, ev, t->slot_size); + entries_offset += t->slot_size; + i++; + } + } + } + s->shadow_head = s->head; + s->head += num_entries; + s->num_claimed = num_entries; + s->num_event = i; + s->pos = 0; + + /* automatically disclaim entries if number of rte_events is zero */ + if (unlikely(i == 0)) + opdl_stage_disclaim(s, 0, false); + + return i; +} + +/* Thread-safe version of function to claim slots for processing */ +static __rte_always_inline uint32_t +opdl_stage_claim_multithread(struct opdl_stage *s, void *entries, + uint32_t num_entries, uint32_t *seq, bool block) +{ + uint32_t old_head; + struct opdl_ring *t = s->t; + uint32_t i = 0, offset; + uint8_t *entries_offset = (uint8_t *)entries; + + if (seq == NULL) { + PMD_DRV_LOG(ERR, "Invalid seq PTR"); + return 0; + } + offset = opdl_first_entry_id(*seq, s->nb_instance, s->instance_id); + num_entries = offset + (s->nb_instance * num_entries); + + move_head_atomically(s, &num_entries, &old_head, block, true); + + for (; offset < num_entries; offset += s->nb_instance) { + memcpy(entries_offset, get_slot(t, s->head + offset), + t->slot_size); + entries_offset += t->slot_size; + i++; + } + + *seq = old_head; + + return i; +} + +/* Claim and copy slot pointers, optimised for single-thread operation */ +static __rte_always_inline uint32_t +opdl_stage_claim_copy_singlethread(struct opdl_stage *s, void *entries, + uint32_t num_entries, uint32_t *seq, bool block) +{ + num_entries = num_to_process(s, num_entries, block); + if (num_entries == 0) + return 0; + copy_entries_out(s->t, s->head, entries, num_entries); + if (seq != NULL) + *seq = s->head; + s->head += num_entries; + return num_entries; +} + +/* Thread-safe version of function to claim and copy pointers to slots */ +static __rte_always_inline uint32_t +opdl_stage_claim_copy_multithread(struct opdl_stage *s, void *entries, + uint32_t num_entries, uint32_t *seq, bool block) +{ + uint32_t old_head; + + move_head_atomically(s, &num_entries, &old_head, block, true); + if (num_entries == 0) + return 0; + copy_entries_out(s->t, old_head, entries, num_entries); + if (seq != NULL) + *seq = old_head; + return num_entries; +} + +static __rte_always_inline void +opdl_stage_disclaim_singlethread_n(struct opdl_stage *s, + uint32_t num_entries) +{ + uint32_t old_tail = s->shared.tail; + + if (unlikely(num_entries > (s->head - old_tail))) { + PMD_DRV_LOG(WARNING, "Attempt to disclaim (%u) more than claimed (%u)", + num_entries, s->head - old_tail); + num_entries = s->head - old_tail; + } + __atomic_store_n(&s->shared.tail, num_entries + old_tail, + __ATOMIC_RELEASE); +} + +uint32_t +opdl_ring_input(struct opdl_ring *t, const void *entries, uint32_t num_entries, + bool block) +{ + if (input_stage(t)->threadsafe == false) + return opdl_ring_input_singlethread(t, entries, num_entries, + block); + else + return opdl_ring_input_multithread(t, entries, num_entries, + block); +} + +uint32_t +opdl_ring_copy_from_burst(struct opdl_ring *t, struct opdl_stage *s, + const void *entries, uint32_t num_entries, bool block) +{ + uint32_t head = s->head; + + num_entries = num_to_process(s, num_entries, block); + + if (num_entries == 0) + return 0; + + copy_entries_in(t, head, entries, num_entries); + + s->head += num_entries; + __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + + return num_entries; + +} + +uint32_t +opdl_ring_copy_to_burst(struct opdl_ring *t, struct opdl_stage *s, + void *entries, uint32_t num_entries, bool block) +{ + uint32_t head = s->head; + + num_entries = num_to_process(s, num_entries, block); + if (num_entries == 0) + return 0; + + copy_entries_out(t, head, entries, num_entries); + + s->head += num_entries; + __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + + return num_entries; +} + +uint32_t +opdl_stage_find_num_available(struct opdl_stage *s, uint32_t num_entries) +{ + /* return (num_to_process(s, num_entries, false)); */ + + if (available(s) >= num_entries) + return num_entries; + + update_available_seq(s); + + uint32_t avail = available(s); + + if (avail == 0) { + rte_pause(); + return 0; + } + return (avail <= num_entries) ? avail : num_entries; +} + +uint32_t +opdl_stage_claim(struct opdl_stage *s, void *entries, + uint32_t num_entries, uint32_t *seq, bool block, bool atomic) +{ + if (s->threadsafe == false) + return opdl_stage_claim_singlethread(s, entries, num_entries, + seq, block, atomic); + else + return opdl_stage_claim_multithread(s, entries, num_entries, + seq, block); +} + +uint32_t +opdl_stage_claim_copy(struct opdl_stage *s, void *entries, + uint32_t num_entries, uint32_t *seq, bool block) +{ + if (s->threadsafe == false) + return opdl_stage_claim_copy_singlethread(s, entries, + num_entries, seq, block); + else + return opdl_stage_claim_copy_multithread(s, entries, + num_entries, seq, block); +} + +void +opdl_stage_disclaim_n(struct opdl_stage *s, uint32_t num_entries, + bool block) +{ + + if (s->threadsafe == false) { + opdl_stage_disclaim_singlethread_n(s, s->num_claimed); + } else { + struct claim_manager *disclaims = + &s->pending_disclaims[rte_lcore_id()]; + + if (unlikely(num_entries > s->num_slots)) { + PMD_DRV_LOG(WARNING, "Attempt to disclaim (%u) more than claimed (%u)", + num_entries, disclaims->num_claimed); + num_entries = disclaims->num_claimed; + } + + num_entries = RTE_MIN(num_entries + disclaims->num_to_disclaim, + disclaims->num_claimed); + opdl_stage_disclaim_multithread_n(s, num_entries, block); + } +} + +int +opdl_stage_disclaim(struct opdl_stage *s, uint32_t num_entries, bool block) +{ + if (num_entries != s->num_event) { + rte_errno = EINVAL; + return 0; + } + if (s->threadsafe == false) { + __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + s->seq += s->num_claimed; + s->shadow_head = s->head; + s->num_claimed = 0; + } else { + struct claim_manager *disclaims = + &s->pending_disclaims[rte_lcore_id()]; + opdl_stage_disclaim_multithread_n(s, disclaims->num_claimed, + block); + } + return num_entries; +} + +uint32_t +opdl_ring_available(struct opdl_ring *t) +{ + return opdl_stage_available(&t->stages[0]); +} + +uint32_t +opdl_stage_available(struct opdl_stage *s) +{ + update_available_seq(s); + return available(s); +} + +void +opdl_ring_flush(struct opdl_ring *t) +{ + struct opdl_stage *s = input_stage(t); + + wait_for_available(s, s->num_slots); +} + +/******************** Non performance sensitive functions ********************/ + +/* Initial setup of a new stage's context */ +static int +init_stage(struct opdl_ring *t, struct opdl_stage *s, bool threadsafe, + bool is_input) +{ + uint32_t available = (is_input) ? t->num_slots : 0; + + s->t = t; + s->num_slots = t->num_slots; + s->index = t->num_stages; + s->threadsafe = threadsafe; + s->shared.stage = s; + + /* Alloc memory for deps */ + s->dep_tracking = rte_zmalloc_socket(LIB_NAME, + t->max_num_stages * sizeof(enum dep_type), + 0, t->socket); + if (s->dep_tracking == NULL) + return -ENOMEM; + + s->deps = rte_zmalloc_socket(LIB_NAME, + t->max_num_stages * sizeof(struct shared_state *), + 0, t->socket); + if (s->deps == NULL) { + rte_free(s->dep_tracking); + return -ENOMEM; + } + + s->dep_tracking[s->index] = DEP_SELF; + + if (threadsafe == true) + s->shared.available_seq = available; + else + s->available_seq = available; + + return 0; +} + +/* Add direct or indirect dependencies between stages */ +static int +add_dep(struct opdl_stage *dependent, const struct opdl_stage *dependency, + enum dep_type type) +{ + struct opdl_ring *t = dependent->t; + uint32_t i; + + /* Add new direct dependency */ + if ((type == DEP_DIRECT) && + (dependent->dep_tracking[dependency->index] == + DEP_NONE)) { + PMD_DRV_LOG(DEBUG, "%s:%u direct dependency on %u", + t->name, dependent->index, dependency->index); + dependent->dep_tracking[dependency->index] = DEP_DIRECT; + } + + /* Add new indirect dependency or change direct to indirect */ + if ((type == DEP_INDIRECT) && + ((dependent->dep_tracking[dependency->index] == + DEP_NONE) || + (dependent->dep_tracking[dependency->index] == + DEP_DIRECT))) { + PMD_DRV_LOG(DEBUG, "%s:%u indirect dependency on %u", + t->name, dependent->index, dependency->index); + dependent->dep_tracking[dependency->index] = DEP_INDIRECT; + } + + /* Shouldn't happen... */ + if ((dependent->dep_tracking[dependency->index] == DEP_SELF) && + (dependent != input_stage(t))) { + PMD_DRV_LOG(ERR, "Loop in dependency graph %s:%u", + t->name, dependent->index); + return -EINVAL; + } + + /* Keep going to dependencies of the dependency, until input stage */ + if (dependency != input_stage(t)) + for (i = 0; i < dependency->num_deps; i++) { + int ret = add_dep(dependent, dependency->deps[i]->stage, + DEP_INDIRECT); + + if (ret < 0) + return ret; + } + + /* Make list of sequence numbers for direct dependencies only */ + if (type == DEP_DIRECT) + for (i = 0, dependent->num_deps = 0; i < t->num_stages; i++) + if (dependent->dep_tracking[i] == DEP_DIRECT) { + if ((i == 0) && (dependent->num_deps > 1)) + rte_panic("%s:%u depends on > input", + t->name, + dependent->index); + dependent->deps[dependent->num_deps++] = + &t->stages[i].shared; + } + + return 0; +} + +struct opdl_ring * +opdl_ring_create(const char *name, uint32_t num_slots, uint32_t slot_size, + uint32_t max_num_stages, int socket) +{ + struct opdl_ring *t; + char mz_name[RTE_MEMZONE_NAMESIZE]; + int mz_flags = 0; + struct opdl_stage *st = NULL; + const struct rte_memzone *mz = NULL; + size_t alloc_size = RTE_CACHE_LINE_ROUNDUP(sizeof(*t) + + (num_slots * slot_size)); + + /* Compile time checking */ + RTE_BUILD_BUG_ON((sizeof(struct shared_state) & RTE_CACHE_LINE_MASK) != + 0); + RTE_BUILD_BUG_ON((offsetof(struct opdl_stage, shared) & + RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct opdl_ring, slots) & + RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON(!rte_is_power_of_2(OPDL_DISCLAIMS_PER_LCORE)); + + /* Parameter checking */ + if (name == NULL) { + PMD_DRV_LOG(ERR, "name param is NULL"); + return NULL; + } + if (!rte_is_power_of_2(num_slots)) { + PMD_DRV_LOG(ERR, "num_slots (%u) for %s is not power of 2", + num_slots, name); + return NULL; + } + + /* Alloc memory for stages */ + st = rte_zmalloc_socket(LIB_NAME, + max_num_stages * sizeof(struct opdl_stage), + RTE_CACHE_LINE_SIZE, socket); + if (st == NULL) + goto exit_fail; + + snprintf(mz_name, sizeof(mz_name), "%s%s", LIB_NAME, name); + + /* Alloc memory for memzone */ + mz = rte_memzone_reserve(mz_name, alloc_size, socket, mz_flags); + if (mz == NULL) + goto exit_fail; + + t = mz->addr; + + /* Initialise opdl_ring queue */ + memset(t, 0, sizeof(*t)); + strlcpy(t->name, name, sizeof(t->name)); + t->socket = socket; + t->num_slots = num_slots; + t->mask = num_slots - 1; + t->slot_size = slot_size; + t->max_num_stages = max_num_stages; + t->stages = st; + + PMD_DRV_LOG(DEBUG, "Created %s at %p (num_slots=%u,socket=%i,slot_size=%u)", + t->name, t, num_slots, socket, slot_size); + + return t; + +exit_fail: + PMD_DRV_LOG(ERR, "Cannot reserve memory"); + rte_free(st); + rte_memzone_free(mz); + + return NULL; +} + +void * +opdl_ring_get_slot(const struct opdl_ring *t, uint32_t index) +{ + return get_slot(t, index); +} + +bool +opdl_ring_cas_slot(struct opdl_stage *s, const struct rte_event *ev, + uint32_t index, bool atomic) +{ + uint32_t i = 0, offset; + struct opdl_ring *t = s->t; + struct rte_event *ev_orig = NULL; + bool ev_updated = false; + uint64_t ev_temp = 0; + uint64_t ev_update = 0; + + uint32_t opa_id = 0; + uint32_t flow_id = 0; + uint64_t event = 0; + + if (index > s->num_event) { + PMD_DRV_LOG(ERR, "index is overflow"); + return ev_updated; + } + + ev_temp = ev->event & OPDL_EVENT_MASK; + + if (!atomic) { + offset = opdl_first_entry_id(s->seq, s->nb_instance, + s->instance_id); + offset += index*s->nb_instance; + ev_orig = get_slot(t, s->shadow_head+offset); + if ((ev_orig->event&OPDL_EVENT_MASK) != ev_temp) { + ev_orig->event = ev->event; + ev_updated = true; + } + if (ev_orig->u64 != ev->u64) { + ev_orig->u64 = ev->u64; + ev_updated = true; + } + + } else { + for (i = s->pos; i < s->num_claimed; i++) { + ev_orig = (struct rte_event *) + get_slot(t, s->shadow_head+i); + + event = __atomic_load_n(&(ev_orig->event), + __ATOMIC_ACQUIRE); + + opa_id = OPDL_OPA_MASK & (event >> OPDL_OPA_OFFSET); + flow_id = OPDL_FLOWID_MASK & event; + + if (opa_id >= s->queue_id) + continue; + + if ((flow_id % s->nb_instance) == s->instance_id) { + ev_update = s->queue_id; + ev_update = (ev_update << OPDL_OPA_OFFSET) + | ev->event; + + s->pos = i + 1; + + if ((event & OPDL_EVENT_MASK) != + ev_temp) { + __atomic_store_n(&(ev_orig->event), + ev_update, + __ATOMIC_RELEASE); + ev_updated = true; + } + if (ev_orig->u64 != ev->u64) { + ev_orig->u64 = ev->u64; + ev_updated = true; + } + + break; + } + } + + } + + return ev_updated; +} + +int +opdl_ring_get_socket(const struct opdl_ring *t) +{ + return t->socket; +} + +uint32_t +opdl_ring_get_num_slots(const struct opdl_ring *t) +{ + return t->num_slots; +} + +const char * +opdl_ring_get_name(const struct opdl_ring *t) +{ + return t->name; +} + +/* Check dependency list is valid for a given opdl_ring */ +static int +check_deps(struct opdl_ring *t, struct opdl_stage *deps[], + uint32_t num_deps) +{ + unsigned int i; + + for (i = 0; i < num_deps; ++i) { + if (!deps[i]) { + PMD_DRV_LOG(ERR, "deps[%u] is NULL", i); + return -EINVAL; + } + if (t != deps[i]->t) { + PMD_DRV_LOG(ERR, "deps[%u] is in opdl_ring %s, not %s", + i, deps[i]->t->name, t->name); + return -EINVAL; + } + } + + return 0; +} + +struct opdl_stage * +opdl_stage_add(struct opdl_ring *t, bool threadsafe, bool is_input) +{ + struct opdl_stage *s; + + /* Parameter checking */ + if (!t) { + PMD_DRV_LOG(ERR, "opdl_ring is NULL"); + return NULL; + } + if (t->num_stages == t->max_num_stages) { + PMD_DRV_LOG(ERR, "%s has max number of stages (%u)", + t->name, t->max_num_stages); + return NULL; + } + + s = &t->stages[t->num_stages]; + + if (((uintptr_t)&s->shared & RTE_CACHE_LINE_MASK) != 0) + PMD_DRV_LOG(WARNING, "Tail seq num (%p) of %s stage not cache aligned", + &s->shared, t->name); + + if (init_stage(t, s, threadsafe, is_input) < 0) { + PMD_DRV_LOG(ERR, "Cannot reserve memory"); + return NULL; + } + t->num_stages++; + + return s; +} + +uint32_t +opdl_stage_deps_add(struct opdl_ring *t, struct opdl_stage *s, + uint32_t nb_instance, uint32_t instance_id, + struct opdl_stage *deps[], + uint32_t num_deps) +{ + uint32_t i; + int ret = 0; + + if ((num_deps > 0) && (!deps)) { + PMD_DRV_LOG(ERR, "%s stage has NULL dependencies", t->name); + return -1; + } + ret = check_deps(t, deps, num_deps); + if (ret < 0) + return ret; + + for (i = 0; i < num_deps; i++) { + ret = add_dep(s, deps[i], DEP_DIRECT); + if (ret < 0) + return ret; + } + + s->nb_instance = nb_instance; + s->instance_id = instance_id; + + return ret; +} + +struct opdl_stage * +opdl_ring_get_input_stage(const struct opdl_ring *t) +{ + return input_stage(t); +} + +int +opdl_stage_set_deps(struct opdl_stage *s, struct opdl_stage *deps[], + uint32_t num_deps) +{ + unsigned int i; + int ret; + + if ((num_deps == 0) || (!deps)) { + PMD_DRV_LOG(ERR, "cannot set NULL dependencies"); + return -EINVAL; + } + + ret = check_deps(s->t, deps, num_deps); + if (ret < 0) + return ret; + + /* Update deps */ + for (i = 0; i < num_deps; i++) + s->deps[i] = &deps[i]->shared; + s->num_deps = num_deps; + + return 0; +} + +struct opdl_ring * +opdl_stage_get_opdl_ring(const struct opdl_stage *s) +{ + return s->t; +} + +void +opdl_stage_set_queue_id(struct opdl_stage *s, + uint32_t queue_id) +{ + s->queue_id = queue_id; +} + +void +opdl_ring_dump(const struct opdl_ring *t, FILE *f) +{ + uint32_t i; + + if (t == NULL) { + fprintf(f, "NULL OPDL!\n"); + return; + } + fprintf(f, "OPDL \"%s\": num_slots=%u; mask=%#x; slot_size=%u; num_stages=%u; socket=%i\n", + t->name, t->num_slots, t->mask, t->slot_size, + t->num_stages, t->socket); + for (i = 0; i < t->num_stages; i++) { + uint32_t j; + const struct opdl_stage *s = &t->stages[i]; + + fprintf(f, " %s[%u]: threadsafe=%s; head=%u; available_seq=%u; tail=%u; deps=%u", + t->name, i, (s->threadsafe) ? "true" : "false", + (s->threadsafe) ? s->shared.head : s->head, + (s->threadsafe) ? s->shared.available_seq : + s->available_seq, + s->shared.tail, (s->num_deps > 0) ? + s->deps[0]->stage->index : 0); + for (j = 1; j < s->num_deps; j++) + fprintf(f, ",%u", s->deps[j]->stage->index); + fprintf(f, "\n"); + } + fflush(f); +} + +void +opdl_ring_free(struct opdl_ring *t) +{ + uint32_t i; + const struct rte_memzone *mz; + char mz_name[RTE_MEMZONE_NAMESIZE]; + + if (t == NULL) { + PMD_DRV_LOG(DEBUG, "Freeing NULL OPDL Ring!"); + return; + } + + PMD_DRV_LOG(DEBUG, "Freeing %s opdl_ring at %p", t->name, t); + + for (i = 0; i < t->num_stages; ++i) { + rte_free(t->stages[i].deps); + rte_free(t->stages[i].dep_tracking); + } + + rte_free(t->stages); + + snprintf(mz_name, sizeof(mz_name), "%s%s", LIB_NAME, t->name); + mz = rte_memzone_lookup(mz_name); + if (rte_memzone_free(mz) != 0) + PMD_DRV_LOG(ERR, "Cannot free memzone for %s", t->name); +} + +/* search a opdl_ring from its name */ +struct opdl_ring * +opdl_ring_lookup(const char *name) +{ + const struct rte_memzone *mz; + char mz_name[RTE_MEMZONE_NAMESIZE]; + + snprintf(mz_name, sizeof(mz_name), "%s%s", LIB_NAME, name); + + mz = rte_memzone_lookup(mz_name); + if (mz == NULL) + return NULL; + + return mz->addr; +} + +void +opdl_ring_set_stage_threadsafe(struct opdl_stage *s, bool threadsafe) +{ + s->threadsafe = threadsafe; +} diff --git a/src/spdk/dpdk/drivers/event/opdl/opdl_ring.h b/src/spdk/dpdk/drivers/event/opdl/opdl_ring.h new file mode 100644 index 000000000..14ababe0b --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/opdl_ring.h @@ -0,0 +1,614 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _OPDL_H_ +#define _OPDL_H_ + +/** + * @file + * The "opdl_ring" is a data structure that contains a fixed number of slots, + * with each slot having the same, but configurable, size. Entries are input + * into the opdl_ring by copying into available slots. Once in the opdl_ring, + * an entry is processed by a number of stages, with the ordering of stage + * processing controlled by making stages dependent on one or more other stages. + * An entry is not available for a stage to process until it has been processed + * by that stages dependencies. Entries are always made available for + * processing in the same order that they were input in to the opdl_ring. + * Inputting is considered as a stage that depends on all other stages, + * and is also a dependency of all stages. + * + * Inputting and processing in a stage can support multi-threading. Note that + * multi-thread processing can also be done by making stages co-operate e.g. two + * stages where one processes the even packets and the other processes odd + * packets. + * + * A opdl_ring can be used as the basis for pipeline based applications. Instead + * of each stage in a pipeline dequeuing from a ring, processing and enqueuing + * to another ring, it can process entries in-place on the ring. If stages do + * not depend on each other, they can run in parallel. + * + * The opdl_ring works with entries of configurable size, these could be + * pointers to mbufs, pointers to mbufs with application specific meta-data, + * tasks etc. + */ + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> + +#include <rte_eventdev.h> +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef OPDL_DISCLAIMS_PER_LCORE +/** Multi-threaded processing allows one thread to process multiple batches in a + * stage, while another thread is processing a single large batch. This number + * controls how many non-contiguous batches one stage can process before being + * blocked by the other stage. + */ +#define OPDL_DISCLAIMS_PER_LCORE 8 +#endif + +/** Opaque handle to a opdl_ring instance */ +struct opdl_ring; + +/** Opaque handle to a single stage in a opdl_ring */ +struct opdl_stage; + +/** + * Create a new instance of a opdl_ring. + * + * @param name + * String containing the name to give the new opdl_ring instance. + * @param num_slots + * How many slots the opdl_ring contains. Must be a power a 2! + * @param slot_size + * How many bytes in each slot. + * @param max_num_stages + * Maximum number of stages. + * @param socket + * The NUMA socket (or SOCKET_ID_ANY) to allocate the memory used for this + * opdl_ring instance. + * @param threadsafe + * Whether to support multiple threads inputting to the opdl_ring or not. + * Enabling this may have a negative impact on performance if only one thread + * will be inputting. + * + * @return + * A pointer to a new opdl_ring instance, or NULL on error. + */ +struct opdl_ring * +opdl_ring_create(const char *name, uint32_t num_slots, uint32_t slot_size, + uint32_t max_num_stages, int socket); + +/** + * Get pointer to individual slot in a opdl_ring. + * + * @param t + * The opdl_ring. + * @param index + * Index of slot. If greater than the number of slots it will be masked to be + * within correct range. + * + * @return + * A pointer to that slot. + */ +void * +opdl_ring_get_slot(const struct opdl_ring *t, uint32_t index); + +/** + * Get NUMA socket used by a opdl_ring. + * + * @param t + * The opdl_ring. + * + * @return + * NUMA socket. + */ +int +opdl_ring_get_socket(const struct opdl_ring *t); + +/** + * Get number of slots in a opdl_ring. + * + * @param t + * The opdl_ring. + * + * @return + * Number of slots. + */ +uint32_t +opdl_ring_get_num_slots(const struct opdl_ring *t); + +/** + * Get name of a opdl_ring. + * + * @param t + * The opdl_ring. + * + * @return + * Name string. + */ +const char * +opdl_ring_get_name(const struct opdl_ring *t); + +/** + * Adds a new processing stage to a specified opdl_ring instance. Adding a stage + * while there are entries in the opdl_ring being processed will cause undefined + * behaviour. + * + * @param t + * The opdl_ring to add the stage to. + * @param deps + * An array of pointers to other stages that this stage depends on. The other + * stages must be part of the same opdl_ring! Note that input is an implied + * dependency. This can be NULL if num_deps is 0. + * @param num_deps + * The size of the deps array. + * @param threadsafe + * Whether to support multiple threads processing this stage or not. + * Enabling this may have a negative impact on performance if only one thread + * will be processing this stage. + * @param is_input + * Indication to initialise the stage with all slots available or none + * + * @return + * A pointer to the new stage, or NULL on error. + */ +struct opdl_stage * +opdl_stage_add(struct opdl_ring *t, bool threadsafe, bool is_input); + +/** + * Returns the input stage of a opdl_ring to be used by other API functions. + * + * @param t + * The opdl_ring. + * + * @return + * A pointer to the input stage. + */ +struct opdl_stage * +opdl_ring_get_input_stage(const struct opdl_ring *t); + +/** + * Sets the dependencies for a stage (clears all the previous deps!). Changing + * dependencies while there are entries in the opdl_ring being processed will + * cause undefined behaviour. + * + * @param s + * The stage to set the dependencies for. + * @param deps + * An array of pointers to other stages that this stage will depends on. The + * other stages must be part of the same opdl_ring! + * @param num_deps + * The size of the deps array. This must be > 0. + * + * @return + * 0 on success, a negative value on error. + */ +int +opdl_stage_set_deps(struct opdl_stage *s, struct opdl_stage *deps[], + uint32_t num_deps); + +/** + * Returns the opdl_ring that a stage belongs to. + * + * @param s + * The stage + * + * @return + * A pointer to the opdl_ring that the stage belongs to. + */ +struct opdl_ring * +opdl_stage_get_opdl_ring(const struct opdl_stage *s); + +/** + * Inputs a new batch of entries into the opdl_ring. This function is only + * threadsafe (with the same opdl_ring parameter) if the threadsafe parameter of + * opdl_ring_create() was true. For performance reasons, this function does not + * check input parameters. + * + * @param t + * The opdl_ring to input entries in to. + * @param entries + * An array of entries that will be copied in to the opdl_ring. + * @param num_entries + * The size of the entries array. + * @param block + * If this is true, the function blocks until enough slots are available to + * input all the requested entries. If false, then the function inputs as + * many entries as currently possible. + * + * @return + * The number of entries successfully input. + */ +uint32_t +opdl_ring_input(struct opdl_ring *t, const void *entries, uint32_t num_entries, + bool block); + +/** + * Inputs a new batch of entries into a opdl stage. This function is only + * threadsafe (with the same opdl parameter) if the threadsafe parameter of + * opdl_create() was true. For performance reasons, this function does not + * check input parameters. + * + * @param t + * The opdl ring to input entries in to. + * @param s + * The stage to copy entries to. + * @param entries + * An array of entries that will be copied in to the opdl ring. + * @param num_entries + * The size of the entries array. + * @param block + * If this is true, the function blocks until enough slots are available to + * input all the requested entries. If false, then the function inputs as + * many entries as currently possible. + * + * @return + * The number of entries successfully input. + */ +uint32_t +opdl_ring_copy_from_burst(struct opdl_ring *t, struct opdl_stage *s, + const void *entries, uint32_t num_entries, bool block); + +/** + * Copy a batch of entries from the opdl ring. This function is only + * threadsafe (with the same opdl parameter) if the threadsafe parameter of + * opdl_create() was true. For performance reasons, this function does not + * check input parameters. + * + * @param t + * The opdl ring to copy entries from. + * @param s + * The stage to copy entries from. + * @param entries + * An array of entries that will be copied from the opdl ring. + * @param num_entries + * The size of the entries array. + * @param block + * If this is true, the function blocks until enough slots are available to + * input all the requested entries. If false, then the function inputs as + * many entries as currently possible. + * + * @return + * The number of entries successfully input. + */ +uint32_t +opdl_ring_copy_to_burst(struct opdl_ring *t, struct opdl_stage *s, + void *entries, uint32_t num_entries, bool block); + +/** + * Before processing a batch of entries, a stage must first claim them to get + * access. This function is threadsafe using same opdl_stage parameter if + * the stage was created with threadsafe set to true, otherwise it is only + * threadsafe with a different opdl_stage per thread. For performance + * reasons, this function does not check input parameters. + * + * @param s + * The opdl_ring stage to read entries in. + * @param entries + * An array of pointers to entries that will be filled in by this function. + * @param num_entries + * The number of entries to attempt to claim for processing (and the size of + * the entries array). + * @param seq + * If not NULL, this is set to the value of the internal stage sequence number + * associated with the first entry returned. + * @param block + * If this is true, the function blocks until num_entries slots are available + * to process. If false, then the function claims as many entries as + * currently possible. + * + * @param atomic + * if this is true, the function will return event according to event flow id + * @return + * The number of pointers to entries filled in to the entries array. + */ +uint32_t +opdl_stage_claim(struct opdl_stage *s, void *entries, + uint32_t num_entries, uint32_t *seq, bool block, bool atomic); + +uint32_t +opdl_stage_deps_add(struct opdl_ring *t, struct opdl_stage *s, + uint32_t nb_instance, uint32_t instance_id, + struct opdl_stage *deps[], uint32_t num_deps); + +/** + * A function to check how many entries are ready to be claimed. + * + * @param entries + * An array of pointers to entries. + * @param num_entries + * Number of entries in an array. + * @param arg + * An opaque pointer to data passed to the claim function. + * @param block + * When set to true, the function should wait until num_entries are ready to + * be processed. Otherwise it should return immediately. + * + * @return + * Number of entries ready to be claimed. + */ +typedef uint32_t (opdl_ring_check_entries_t)(void *entries[], + uint32_t num_entries, void *arg, bool block); + +/** + * Before processing a batch of entries, a stage must first claim them to get + * access. Each entry is checked by the passed check() function and depending + * on block value, it waits until num_entries are ready or returns immediately. + * This function is only threadsafe with a different opdl_stage per thread. + * + * @param s + * The opdl_ring stage to read entries in. + * @param entries + * An array of pointers to entries that will be filled in by this function. + * @param num_entries + * The number of entries to attempt to claim for processing (and the size of + * the entries array). + * @param seq + * If not NULL, this is set to the value of the internal stage sequence number + * associated with the first entry returned. + * @param block + * If this is true, the function blocks until num_entries ready slots are + * available to process. If false, then the function claims as many ready + * entries as currently possible. + * @param check + * Pointer to a function called to check entries. + * @param arg + * Opaque data passed to check() function. + * + * @return + * The number of pointers to ready entries filled in to the entries array. + */ +uint32_t +opdl_stage_claim_check(struct opdl_stage *s, void **entries, + uint32_t num_entries, uint32_t *seq, bool block, + opdl_ring_check_entries_t *check, void *arg); + +/** + * Before processing a batch of entries, a stage must first claim them to get + * access. This function is threadsafe using same opdl_stage parameter if + * the stage was created with threadsafe set to true, otherwise it is only + * threadsafe with a different opdl_stage per thread. + * + * The difference between this function and opdl_stage_claim() is that this + * function copies the entries from the opdl_ring. Note that any changes made to + * the copied entries will not be reflected back in to the entries in the + * opdl_ring, so this function probably only makes sense if the entries are + * pointers to other data. For performance reasons, this function does not check + * input parameters. + * + * @param s + * The opdl_ring stage to read entries in. + * @param entries + * An array of entries that will be filled in by this function. + * @param num_entries + * The number of entries to attempt to claim for processing (and the size of + * the entries array). + * @param seq + * If not NULL, this is set to the value of the internal stage sequence number + * associated with the first entry returned. + * @param block + * If this is true, the function blocks until num_entries slots are available + * to process. If false, then the function claims as many entries as + * currently possible. + * + * @return + * The number of entries copied in to the entries array. + */ +uint32_t +opdl_stage_claim_copy(struct opdl_stage *s, void *entries, + uint32_t num_entries, uint32_t *seq, bool block); + +/** + * This function must be called when a stage has finished its processing of + * entries, to make them available to any dependent stages. All entries that are + * claimed by the calling thread in the stage will be disclaimed. It is possible + * to claim multiple batches before disclaiming. For performance reasons, this + * function does not check input parameters. + * + * @param s + * The opdl_ring stage in which to disclaim all claimed entries. + * + * @param block + * Entries are always made available to a stage in the same order that they + * were input in the stage. If a stage is multithread safe, this may mean that + * full disclaiming of a batch of entries can not be considered complete until + * all earlier threads in the stage have disclaimed. If this parameter is true + * then the function blocks until all entries are fully disclaimed, otherwise + * it disclaims as many as currently possible, with non fully disclaimed + * batches stored until the next call to a claim or disclaim function for this + * stage on this thread. + * + * If a thread is not going to process any more entries in this stage, it + * *must* first call this function with this parameter set to true to ensure + * it does not block the entire opdl_ring. + * + * In a single threaded stage, this parameter has no effect. + */ +int +opdl_stage_disclaim(struct opdl_stage *s, uint32_t num_entries, + bool block); + +/** + * This function can be called when a stage has finished its processing of + * entries, to make them available to any dependent stages. The difference + * between this function and opdl_stage_disclaim() is that here only a + * portion of entries are disclaimed, not all of them. For performance reasons, + * this function does not check input parameters. + * + * @param s + * The opdl_ring stage in which to disclaim entries. + * + * @param num_entries + * The number of entries to disclaim. + * + * @param block + * Entries are always made available to a stage in the same order that they + * were input in the stage. If a stage is multithread safe, this may mean that + * full disclaiming of a batch of entries can not be considered complete until + * all earlier threads in the stage have disclaimed. If this parameter is true + * then the function blocks until the specified number of entries has been + * disclaimed (or there are no more entries to disclaim). Otherwise it + * disclaims as many claims as currently possible and an attempt to disclaim + * them is made the next time a claim or disclaim function for this stage on + * this thread is called. + * + * In a single threaded stage, this parameter has no effect. + */ +void +opdl_stage_disclaim_n(struct opdl_stage *s, uint32_t num_entries, + bool block); + +/** + * Check how many entries can be input. + * + * @param t + * The opdl_ring instance to check. + * + * @return + * The number of new entries currently allowed to be input. + */ +uint32_t +opdl_ring_available(struct opdl_ring *t); + +/** + * Check how many entries can be processed in a stage. + * + * @param s + * The stage to check. + * + * @return + * The number of entries currently available to be processed in this stage. + */ +uint32_t +opdl_stage_available(struct opdl_stage *s); + +/** + * Check how many entries are available to be processed. + * + * NOTE : DOES NOT CHANGE ANY STATE WITHIN THE STAGE + * + * @param s + * The stage to check. + * + * @param num_entries + * The number of entries to check for availability. + * + * @return + * The number of entries currently available to be processed in this stage. + */ +uint32_t +opdl_stage_find_num_available(struct opdl_stage *s, uint32_t num_entries); + +/** + * Create empty stage instance and return the pointer. + * + * @param t + * The pointer of opdl_ring. + * + * @param threadsafe + * enable multiple thread or not. + * @return + * The pointer of one empty stage instance. + */ +struct opdl_stage * +opdl_stage_create(struct opdl_ring *t, bool threadsafe); + + +/** + * Set the internal queue id for each stage instance. + * + * @param s + * The pointer of stage instance. + * + * @param queue_id + * The value of internal queue id. + */ +void +opdl_stage_set_queue_id(struct opdl_stage *s, + uint32_t queue_id); + +/** + * Prints information on opdl_ring instance and all its stages + * + * @param t + * The stage to print info on. + * @param f + * Where to print the info. + */ +void +opdl_ring_dump(const struct opdl_ring *t, FILE *f); + +/** + * Blocks until all entries in a opdl_ring have been processed by all stages. + * + * @param t + * The opdl_ring instance to flush. + */ +void +opdl_ring_flush(struct opdl_ring *t); + +/** + * Deallocates all resources used by a opdl_ring instance + * + * @param t + * The opdl_ring instance to free. + */ +void +opdl_ring_free(struct opdl_ring *t); + +/** + * Search for a opdl_ring by its name + * + * @param name + * The name of the opdl_ring. + * @return + * The pointer to the opdl_ring matching the name, or NULL if not found. + * + */ +struct opdl_ring * +opdl_ring_lookup(const char *name); + +/** + * Set a opdl_stage to threadsafe variable. + * + * @param s + * The opdl_stage. + * @param threadsafe + * Threadsafe value. + */ +void +opdl_ring_set_stage_threadsafe(struct opdl_stage *s, bool threadsafe); + + +/** + * Compare the event descriptor with original version in the ring. + * if key field event descriptor is changed by application, then + * update the slot in the ring otherwise do nothing with it. + * the key field is flow_id, priority, mbuf, impl_opaque + * + * @param s + * The opdl_stage. + * @param ev + * pointer of the event descriptor. + * @param index + * index of the event descriptor. + * @param atomic + * queue type associate with the stage. + * @return + * if the event key field is changed compare with previous record. + */ + +bool +opdl_ring_cas_slot(struct opdl_stage *s, const struct rte_event *ev, + uint32_t index, bool atomic); + +#ifdef __cplusplus +} +#endif + +#endif /* _OPDL_H_ */ diff --git a/src/spdk/dpdk/drivers/event/opdl/opdl_test.c b/src/spdk/dpdk/drivers/event/opdl/opdl_test.c new file mode 100644 index 000000000..e7a32fbd3 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/opdl_test.c @@ -0,0 +1,1054 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <unistd.h> +#include <sys/queue.h> + +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_debug.h> +#include <rte_ethdev.h> +#include <rte_cycles.h> +#include <rte_eventdev.h> +#include <rte_bus_vdev.h> +#include <rte_pause.h> + +#include "opdl_evdev.h" +#include "opdl_log.h" + + +#define MAX_PORTS 16 +#define MAX_QIDS 16 +#define NUM_PACKETS (1<<18) +#define NUM_EVENTS 256 +#define BURST_SIZE 32 + + + +static int evdev; + +struct test { + struct rte_mempool *mbuf_pool; + uint8_t port[MAX_PORTS]; + uint8_t qid[MAX_QIDS]; + int nb_qids; +}; + +static struct rte_mempool *eventdev_func_mempool; + +static __rte_always_inline struct rte_mbuf * +rte_gen_arp(int portid, struct rte_mempool *mp) +{ + /* + * len = 14 + 46 + * ARP, Request who-has 10.0.0.1 tell 10.0.0.2, length 46 + */ + static const uint8_t arp_request[] = { + /*0x0000:*/ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xec, 0xa8, + 0x6b, 0xfd, 0x02, 0x29, 0x08, 0x06, 0x00, 0x01, + /*0x0010:*/ 0x08, 0x00, 0x06, 0x04, 0x00, 0x01, 0xec, 0xa8, + 0x6b, 0xfd, 0x02, 0x29, 0x0a, 0x00, 0x00, 0x01, + /*0x0020:*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /*0x0030:*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + }; + struct rte_mbuf *m; + int pkt_len = sizeof(arp_request) - 1; + + m = rte_pktmbuf_alloc(mp); + if (!m) + return 0; + + memcpy((void *)((uintptr_t)m->buf_addr + m->data_off), + arp_request, pkt_len); + rte_pktmbuf_pkt_len(m) = pkt_len; + rte_pktmbuf_data_len(m) = pkt_len; + + RTE_SET_USED(portid); + + return m; +} + +/* initialization and config */ +static __rte_always_inline int +init(struct test *t, int nb_queues, int nb_ports) +{ + struct rte_event_dev_config config = { + .nb_event_queues = nb_queues, + .nb_event_ports = nb_ports, + .nb_event_queue_flows = 1024, + .nb_events_limit = 4096, + .nb_event_port_dequeue_depth = 128, + .nb_event_port_enqueue_depth = 128, + }; + int ret; + + void *temp = t->mbuf_pool; /* save and restore mbuf pool */ + + memset(t, 0, sizeof(*t)); + t->mbuf_pool = temp; + + ret = rte_event_dev_configure(evdev, &config); + if (ret < 0) + PMD_DRV_LOG(ERR, "%d: Error configuring device\n", __LINE__); + return ret; +}; + +static __rte_always_inline int +create_ports(struct test *t, int num_ports) +{ + int i; + static const struct rte_event_port_conf conf = { + .new_event_threshold = 1024, + .dequeue_depth = 32, + .enqueue_depth = 32, + }; + if (num_ports > MAX_PORTS) + return -1; + + for (i = 0; i < num_ports; i++) { + if (rte_event_port_setup(evdev, i, &conf) < 0) { + PMD_DRV_LOG(ERR, "Error setting up port %d\n", i); + return -1; + } + t->port[i] = i; + } + + return 0; +}; + +static __rte_always_inline int +create_queues_type(struct test *t, int num_qids, enum queue_type flags) +{ + int i; + uint8_t type; + + switch (flags) { + case OPDL_Q_TYPE_ORDERED: + type = RTE_SCHED_TYPE_ORDERED; + break; + case OPDL_Q_TYPE_ATOMIC: + type = RTE_SCHED_TYPE_ATOMIC; + break; + default: + type = 0; + } + + /* Q creation */ + const struct rte_event_queue_conf conf = { + .event_queue_cfg = + (flags == OPDL_Q_TYPE_SINGLE_LINK ? + RTE_EVENT_QUEUE_CFG_SINGLE_LINK : 0), + .schedule_type = type, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + + for (i = t->nb_qids ; i < t->nb_qids + num_qids; i++) { + if (rte_event_queue_setup(evdev, i, &conf) < 0) { + PMD_DRV_LOG(ERR, "%d: error creating qid %d\n ", + __LINE__, i); + return -1; + } + t->qid[i] = i; + } + + t->nb_qids += num_qids; + + if (t->nb_qids > MAX_QIDS) + return -1; + + return 0; +} + + +/* destruction */ +static __rte_always_inline int +cleanup(struct test *t __rte_unused) +{ + rte_event_dev_stop(evdev); + rte_event_dev_close(evdev); + PMD_DRV_LOG(ERR, "clean up for test done\n"); + return 0; +}; + +static int +ordered_basic(struct test *t) +{ + const uint8_t rx_port = 0; + const uint8_t w1_port = 1; + const uint8_t w3_port = 3; + const uint8_t tx_port = 4; + int err; + uint32_t i; + uint32_t deq_pkts; + struct rte_mbuf *mbufs[3]; + + const uint32_t MAGIC_SEQN = 1234; + + /* Create instance with 5 ports */ + if (init(t, 2, tx_port+1) < 0 || + create_ports(t, tx_port+1) < 0 || + create_queues_type(t, 2, OPDL_Q_TYPE_ORDERED)) { + PMD_DRV_LOG(ERR, "%d: Error initializing device\n", __LINE__); + return -1; + } + + /* + * CQ mapping to QID + * We need three ports, all mapped to the same ordered qid0. Then we'll + * take a packet out to each port, re-enqueue in reverse order, + * then make sure the reordering has taken place properly when we + * dequeue from the tx_port. + * + * Simplified test setup diagram: + * + * rx_port w1_port + * \ / \ + * qid0 - w2_port - qid1 + * \ / \ + * w3_port tx_port + */ + /* CQ mapping to QID for LB ports (directed mapped on create) */ + for (i = w1_port; i <= w3_port; i++) { + err = rte_event_port_link(evdev, t->port[i], &t->qid[0], NULL, + 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: error mapping lb qid\n", + __LINE__); + cleanup(t); + return -1; + } + } + + err = rte_event_port_link(evdev, t->port[tx_port], &t->qid[1], NULL, + 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: error mapping TX qid\n", __LINE__); + cleanup(t); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + PMD_DRV_LOG(ERR, "%d: Error with start call\n", __LINE__); + return -1; + } + /* Enqueue 3 packets to the rx port */ + for (i = 0; i < 3; i++) { + struct rte_event ev; + mbufs[i] = rte_gen_arp(0, t->mbuf_pool); + if (!mbufs[i]) { + PMD_DRV_LOG(ERR, "%d: gen of pkt failed\n", __LINE__); + return -1; + } + + ev.queue_id = t->qid[0]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = mbufs[i]; + mbufs[i]->seqn = MAGIC_SEQN + i; + + /* generate pkt and enqueue */ + err = rte_event_enqueue_burst(evdev, t->port[rx_port], &ev, 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: Failed to enqueue pkt %u, retval = %u\n", + __LINE__, i, err); + return -1; + } + } + + /* use extra slot to make logic in loops easier */ + struct rte_event deq_ev[w3_port + 1]; + + uint32_t seq = 0; + + /* Dequeue the 3 packets, one from each worker port */ + for (i = w1_port; i <= w3_port; i++) { + deq_pkts = rte_event_dequeue_burst(evdev, t->port[i], + &deq_ev[i], 1, 0); + if (deq_pkts != 1) { + PMD_DRV_LOG(ERR, "%d: Failed to deq\n", __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + seq = deq_ev[i].mbuf->seqn - MAGIC_SEQN; + + if (seq != (i-1)) { + PMD_DRV_LOG(ERR, " seq test failed ! eq is %d , " + "port number is %u\n", seq, i); + return -1; + } + } + + /* Enqueue each packet in reverse order, flushing after each one */ + for (i = w3_port; i >= w1_port; i--) { + + deq_ev[i].op = RTE_EVENT_OP_FORWARD; + deq_ev[i].queue_id = t->qid[1]; + err = rte_event_enqueue_burst(evdev, t->port[i], &deq_ev[i], 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + + /* dequeue from the tx ports, we should get 3 packets */ + deq_pkts = rte_event_dequeue_burst(evdev, t->port[tx_port], deq_ev, + 3, 0); + + /* Check to see if we've got all 3 packets */ + if (deq_pkts != 3) { + PMD_DRV_LOG(ERR, "%d: expected 3 pkts at tx port got %d from port %d\n", + __LINE__, deq_pkts, tx_port); + rte_event_dev_dump(evdev, stdout); + return 1; + } + + /* Destroy the instance */ + cleanup(t); + + return 0; +} + + +static int +atomic_basic(struct test *t) +{ + const uint8_t rx_port = 0; + const uint8_t w1_port = 1; + const uint8_t w3_port = 3; + const uint8_t tx_port = 4; + int err; + int i; + uint32_t deq_pkts; + struct rte_mbuf *mbufs[3]; + const uint32_t MAGIC_SEQN = 1234; + + /* Create instance with 5 ports */ + if (init(t, 2, tx_port+1) < 0 || + create_ports(t, tx_port+1) < 0 || + create_queues_type(t, 2, OPDL_Q_TYPE_ATOMIC)) { + PMD_DRV_LOG(ERR, "%d: Error initializing device\n", __LINE__); + return -1; + } + + + /* + * CQ mapping to QID + * We need three ports, all mapped to the same ordered qid0. Then we'll + * take a packet out to each port, re-enqueue in reverse order, + * then make sure the reordering has taken place properly when we + * dequeue from the tx_port. + * + * Simplified test setup diagram: + * + * rx_port w1_port + * \ / \ + * qid0 - w2_port - qid1 + * \ / \ + * w3_port tx_port + */ + /* CQ mapping to QID for Atomic ports (directed mapped on create) */ + for (i = w1_port; i <= w3_port; i++) { + err = rte_event_port_link(evdev, t->port[i], &t->qid[0], NULL, + 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: error mapping lb qid\n", + __LINE__); + cleanup(t); + return -1; + } + } + + err = rte_event_port_link(evdev, t->port[tx_port], &t->qid[1], NULL, + 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: error mapping TX qid\n", __LINE__); + cleanup(t); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + PMD_DRV_LOG(ERR, "%d: Error with start call\n", __LINE__); + return -1; + } + + /* Enqueue 3 packets to the rx port */ + for (i = 0; i < 3; i++) { + struct rte_event ev; + mbufs[i] = rte_gen_arp(0, t->mbuf_pool); + if (!mbufs[i]) { + PMD_DRV_LOG(ERR, "%d: gen of pkt failed\n", __LINE__); + return -1; + } + + ev.queue_id = t->qid[0]; + ev.op = RTE_EVENT_OP_NEW; + ev.flow_id = 1; + ev.mbuf = mbufs[i]; + mbufs[i]->seqn = MAGIC_SEQN + i; + + /* generate pkt and enqueue */ + err = rte_event_enqueue_burst(evdev, t->port[rx_port], &ev, 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: Failed to enqueue pkt %u, retval = %u\n", + __LINE__, i, err); + return -1; + } + } + + /* use extra slot to make logic in loops easier */ + struct rte_event deq_ev[w3_port + 1]; + + /* Dequeue the 3 packets, one from each worker port */ + for (i = w1_port; i <= w3_port; i++) { + + deq_pkts = rte_event_dequeue_burst(evdev, t->port[i], + deq_ev, 3, 0); + + if (t->port[i] != 2) { + if (deq_pkts != 0) { + PMD_DRV_LOG(ERR, "%d: deq none zero !\n", + __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + } else { + + if (deq_pkts != 3) { + PMD_DRV_LOG(ERR, "%d: deq not eqal to 3 %u !\n", + __LINE__, deq_pkts); + rte_event_dev_dump(evdev, stdout); + return -1; + } + + int j; + for (j = 0; j < 3; j++) { + deq_ev[j].op = RTE_EVENT_OP_FORWARD; + deq_ev[j].queue_id = t->qid[1]; + } + + err = rte_event_enqueue_burst(evdev, t->port[i], + deq_ev, 3); + + if (err != 3) { + PMD_DRV_LOG(ERR, "port %d: Failed to enqueue pkt %u, " + "retval = %u\n", + t->port[i], 3, err); + return -1; + } + + } + + } + + + /* dequeue from the tx ports, we should get 3 packets */ + deq_pkts = rte_event_dequeue_burst(evdev, t->port[tx_port], deq_ev, + 3, 0); + + /* Check to see if we've got all 3 packets */ + if (deq_pkts != 3) { + PMD_DRV_LOG(ERR, "%d: expected 3 pkts at tx port got %d from port %d\n", + __LINE__, deq_pkts, tx_port); + rte_event_dev_dump(evdev, stdout); + return 1; + } + + cleanup(t); + + return 0; +} +static __rte_always_inline int +check_qid_stats(uint32_t id[], int index) +{ + + if (index == 0) { + if (id[0] != 3 || id[1] != 3 + || id[2] != 3) + return -1; + } else if (index == 1) { + if (id[0] != 5 || id[1] != 5 + || id[2] != 2) + return -1; + } else if (index == 2) { + if (id[0] != 3 || id[1] != 1 + || id[2] != 1) + return -1; + } + + return 0; +} + + +static int +check_statistics(void) +{ + int num_ports = 3; /* Hard-coded for this app */ + int i; + + for (i = 0; i < num_ports; i++) { + int num_stats, num_stats_returned; + + num_stats = rte_event_dev_xstats_names_get(0, + RTE_EVENT_DEV_XSTATS_PORT, + i, + NULL, + NULL, + 0); + if (num_stats > 0) { + + uint32_t id[num_stats]; + struct rte_event_dev_xstats_name names[num_stats]; + uint64_t values[num_stats]; + + num_stats_returned = rte_event_dev_xstats_names_get(0, + RTE_EVENT_DEV_XSTATS_PORT, + i, + names, + id, + num_stats); + + if (num_stats == num_stats_returned) { + num_stats_returned = rte_event_dev_xstats_get(0, + RTE_EVENT_DEV_XSTATS_PORT, + i, + id, + values, + num_stats); + + if (num_stats == num_stats_returned) { + int err; + + err = check_qid_stats(id, i); + + if (err) + return err; + + } else { + return -1; + } + } else { + return -1; + } + } else { + return -1; + } + } + return 0; +} + +#define OLD_NUM_PACKETS 3 +#define NEW_NUM_PACKETS 2 +static int +single_link_w_stats(struct test *t) +{ + const uint8_t rx_port = 0; + const uint8_t w1_port = 1; + const uint8_t tx_port = 2; + int err; + int i; + uint32_t deq_pkts; + struct rte_mbuf *mbufs[3]; + RTE_SET_USED(mbufs); + + /* Create instance with 3 ports */ + if (init(t, 2, tx_port + 1) < 0 || + create_ports(t, 3) < 0 || /* 0,1,2 */ + create_queues_type(t, 1, OPDL_Q_TYPE_SINGLE_LINK) < 0 || + create_queues_type(t, 1, OPDL_Q_TYPE_ORDERED) < 0) { + PMD_DRV_LOG(ERR, "%d: Error initializing device\n", __LINE__); + return -1; + } + + + /* + * + * Simplified test setup diagram: + * + * rx_port(0) + * \ + * qid0 - w1_port(1) - qid1 + * \ + * tx_port(2) + */ + + err = rte_event_port_link(evdev, t->port[1], &t->qid[0], NULL, + 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: error linking port:[%u] to queue:[%u]\n", + __LINE__, + t->port[1], + t->qid[0]); + cleanup(t); + return -1; + } + + err = rte_event_port_link(evdev, t->port[2], &t->qid[1], NULL, + 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: error linking port:[%u] to queue:[%u]\n", + __LINE__, + t->port[2], + t->qid[1]); + cleanup(t); + return -1; + } + + if (rte_event_dev_start(evdev) != 0) { + PMD_DRV_LOG(ERR, "%d: failed to start device\n", __LINE__); + cleanup(t); + return -1; + } + + /* + * Enqueue 3 packets to the rx port + */ + for (i = 0; i < 3; i++) { + struct rte_event ev; + mbufs[i] = rte_gen_arp(0, t->mbuf_pool); + if (!mbufs[i]) { + PMD_DRV_LOG(ERR, "%d: gen of pkt failed\n", __LINE__); + return -1; + } + + ev.queue_id = t->qid[0]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = mbufs[i]; + mbufs[i]->seqn = 1234 + i; + + /* generate pkt and enqueue */ + err = rte_event_enqueue_burst(evdev, t->port[rx_port], &ev, 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: Failed to enqueue pkt %u, retval = %u\n", + __LINE__, + t->port[rx_port], + err); + return -1; + } + } + + /* Dequeue the 3 packets, from SINGLE_LINK worker port */ + struct rte_event deq_ev[3]; + + deq_pkts = rte_event_dequeue_burst(evdev, + t->port[w1_port], + deq_ev, 3, 0); + + if (deq_pkts != 3) { + PMD_DRV_LOG(ERR, "%d: deq not 3 !\n", __LINE__); + cleanup(t); + return -1; + } + + /* Just enqueue 2 onto new ring */ + for (i = 0; i < NEW_NUM_PACKETS; i++) + deq_ev[i].queue_id = t->qid[1]; + + deq_pkts = rte_event_enqueue_burst(evdev, + t->port[w1_port], + deq_ev, + NEW_NUM_PACKETS); + + if (deq_pkts != 2) { + PMD_DRV_LOG(ERR, "%d: enq not 2 but %u!\n", __LINE__, deq_pkts); + cleanup(t); + return -1; + } + + /* dequeue from the tx ports, we should get 2 packets */ + deq_pkts = rte_event_dequeue_burst(evdev, + t->port[tx_port], + deq_ev, + 3, + 0); + + /* Check to see if we've got all 2 packets */ + if (deq_pkts != 2) { + PMD_DRV_LOG(ERR, "%d: expected 2 pkts at tx port got %d from port %d\n", + __LINE__, deq_pkts, tx_port); + cleanup(t); + return -1; + } + + if (!check_statistics()) { + PMD_DRV_LOG(ERR, "xstats check failed"); + cleanup(t); + return -1; + } + + cleanup(t); + + return 0; +} + +static int +single_link(struct test *t) +{ + const uint8_t tx_port = 2; + int err; + struct rte_mbuf *mbufs[3]; + RTE_SET_USED(mbufs); + + /* Create instance with 5 ports */ + if (init(t, 2, tx_port+1) < 0 || + create_ports(t, 3) < 0 || /* 0,1,2 */ + create_queues_type(t, 1, OPDL_Q_TYPE_SINGLE_LINK) < 0 || + create_queues_type(t, 1, OPDL_Q_TYPE_ORDERED) < 0) { + PMD_DRV_LOG(ERR, "%d: Error initializing device\n", __LINE__); + return -1; + } + + + /* + * + * Simplified test setup diagram: + * + * rx_port(0) + * \ + * qid0 - w1_port(1) - qid1 + * \ + * tx_port(2) + */ + + err = rte_event_port_link(evdev, t->port[1], &t->qid[0], NULL, + 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: error mapping lb qid\n", __LINE__); + cleanup(t); + return -1; + } + + err = rte_event_port_link(evdev, t->port[2], &t->qid[0], NULL, + 1); + if (err != 1) { + PMD_DRV_LOG(ERR, "%d: error mapping lb qid\n", __LINE__); + cleanup(t); + return -1; + } + + if (rte_event_dev_start(evdev) == 0) { + PMD_DRV_LOG(ERR, "%d: start DIDN'T FAIL with more than 1 " + "SINGLE_LINK PORT\n", __LINE__); + cleanup(t); + return -1; + } + + cleanup(t); + + return 0; +} + + +static __rte_always_inline void +populate_event_burst(struct rte_event ev[], + uint8_t qid, + uint16_t num_events) +{ + uint16_t i; + for (i = 0; i < num_events; i++) { + ev[i].flow_id = 1; + ev[i].op = RTE_EVENT_OP_NEW; + ev[i].sched_type = RTE_SCHED_TYPE_ORDERED; + ev[i].queue_id = qid; + ev[i].event_type = RTE_EVENT_TYPE_ETHDEV; + ev[i].sub_event_type = 0; + ev[i].priority = RTE_EVENT_DEV_PRIORITY_NORMAL; + ev[i].mbuf = (struct rte_mbuf *)0xdead0000; + } +} + +#define NUM_QUEUES 3 +#define BATCH_SIZE 32 + +static int +qid_basic(struct test *t) +{ + int err = 0; + + uint8_t q_id = 0; + uint8_t p_id = 0; + + uint32_t num_events; + uint32_t i; + + struct rte_event ev[BATCH_SIZE]; + + /* Create instance with 4 ports */ + if (init(t, NUM_QUEUES, NUM_QUEUES+1) < 0 || + create_ports(t, NUM_QUEUES+1) < 0 || + create_queues_type(t, NUM_QUEUES, OPDL_Q_TYPE_ORDERED)) { + PMD_DRV_LOG(ERR, "%d: Error initializing device\n", __LINE__); + return -1; + } + + for (i = 0; i < NUM_QUEUES; i++) { + int nb_linked; + q_id = i; + + nb_linked = rte_event_port_link(evdev, + i+1, /* port = q_id + 1*/ + &q_id, + NULL, + 1); + + if (nb_linked != 1) { + + PMD_DRV_LOG(ERR, "%s:%d: error mapping port:%u to queue:%u\n", + __FILE__, + __LINE__, + i + 1, + q_id); + + err = -1; + break; + } + + } + + + /* Try and link to the same port again */ + if (!err) { + uint8_t t_qid = 0; + if (rte_event_port_link(evdev, + 1, + &t_qid, + NULL, + 1) > 0) { + PMD_DRV_LOG(ERR, "%s:%d: Second call to port link on same port DID NOT fail\n", + __FILE__, + __LINE__); + err = -1; + } + + uint32_t test_num_events; + + if (!err) { + test_num_events = rte_event_dequeue_burst(evdev, + p_id, + ev, + BATCH_SIZE, + 0); + if (test_num_events != 0) { + PMD_DRV_LOG(ERR, "%s:%d: Error dequeuing 0 packets from port %u on stopped device\n", + __FILE__, + __LINE__, + p_id); + err = -1; + } + } + + if (!err) { + test_num_events = rte_event_enqueue_burst(evdev, + p_id, + ev, + BATCH_SIZE); + if (test_num_events != 0) { + PMD_DRV_LOG(ERR, "%s:%d: Error enqueuing 0 packets to port %u on stopped device\n", + __FILE__, + __LINE__, + p_id); + err = -1; + } + } + } + + + /* Start the devicea */ + if (!err) { + if (rte_event_dev_start(evdev) < 0) { + PMD_DRV_LOG(ERR, "%s:%d: Error with start call\n", + __FILE__, + __LINE__); + err = -1; + } + } + + + /* Check we can't do any more links now that device is started.*/ + if (!err) { + uint8_t t_qid = 0; + if (rte_event_port_link(evdev, + 1, + &t_qid, + NULL, + 1) > 0) { + PMD_DRV_LOG(ERR, "%s:%d: Call to port link on started device DID NOT fail\n", + __FILE__, + __LINE__); + err = -1; + } + } + + if (!err) { + + q_id = 0; + + populate_event_burst(ev, + q_id, + BATCH_SIZE); + + num_events = rte_event_enqueue_burst(evdev, + p_id, + ev, + BATCH_SIZE); + if (num_events != BATCH_SIZE) { + PMD_DRV_LOG(ERR, "%s:%d: Error enqueuing rx packets\n", + __FILE__, + __LINE__); + err = -1; + } + } + + if (!err) { + while (++p_id < NUM_QUEUES) { + + num_events = rte_event_dequeue_burst(evdev, + p_id, + ev, + BATCH_SIZE, + 0); + + if (num_events != BATCH_SIZE) { + PMD_DRV_LOG(ERR, "%s:%d: Error dequeuing packets from port %u\n", + __FILE__, + __LINE__, + p_id); + err = -1; + break; + } + + if (ev[0].queue_id != q_id) { + PMD_DRV_LOG(ERR, "%s:%d: Error event portid[%u] q_id:[%u] does not match expected:[%u]\n", + __FILE__, + __LINE__, + p_id, + ev[0].queue_id, + q_id); + err = -1; + break; + } + + populate_event_burst(ev, + ++q_id, + BATCH_SIZE); + + num_events = rte_event_enqueue_burst(evdev, + p_id, + ev, + BATCH_SIZE); + if (num_events != BATCH_SIZE) { + PMD_DRV_LOG(ERR, "%s:%d: Error enqueuing packets from port:%u to queue:%u\n", + __FILE__, + __LINE__, + p_id, + q_id); + err = -1; + break; + } + } + } + + if (!err) { + num_events = rte_event_dequeue_burst(evdev, + p_id, + ev, + BATCH_SIZE, + 0); + if (num_events != BATCH_SIZE) { + PMD_DRV_LOG(ERR, "%s:%d: Error dequeuing packets from tx port %u\n", + __FILE__, + __LINE__, + p_id); + err = -1; + } + } + + cleanup(t); + + return err; +} + + + +int +opdl_selftest(void) +{ + struct test *t = malloc(sizeof(struct test)); + int ret; + + const char *eventdev_name = "event_opdl0"; + + evdev = rte_event_dev_get_dev_id(eventdev_name); + + if (evdev < 0) { + PMD_DRV_LOG(ERR, "%d: Eventdev %s not found - creating.\n", + __LINE__, eventdev_name); + /* turn on stats by default */ + if (rte_vdev_init(eventdev_name, "do_validation=1") < 0) { + PMD_DRV_LOG(ERR, "Error creating eventdev\n"); + free(t); + return -1; + } + evdev = rte_event_dev_get_dev_id(eventdev_name); + if (evdev < 0) { + PMD_DRV_LOG(ERR, "Error finding newly created eventdev\n"); + free(t); + return -1; + } + } + + /* Only create mbuf pool once, reuse for each test run */ + if (!eventdev_func_mempool) { + eventdev_func_mempool = rte_pktmbuf_pool_create( + "EVENTDEV_SW_SA_MBUF_POOL", + (1<<12), /* 4k buffers */ + 32 /*MBUF_CACHE_SIZE*/, + 0, + 512, /* use very small mbufs */ + rte_socket_id()); + if (!eventdev_func_mempool) { + PMD_DRV_LOG(ERR, "ERROR creating mempool\n"); + free(t); + return -1; + } + } + t->mbuf_pool = eventdev_func_mempool; + + PMD_DRV_LOG(ERR, "*** Running Ordered Basic test...\n"); + ret = ordered_basic(t); + + PMD_DRV_LOG(ERR, "*** Running Atomic Basic test...\n"); + ret = atomic_basic(t); + + + PMD_DRV_LOG(ERR, "*** Running QID Basic test...\n"); + ret = qid_basic(t); + + PMD_DRV_LOG(ERR, "*** Running SINGLE LINK failure test...\n"); + ret = single_link(t); + + PMD_DRV_LOG(ERR, "*** Running SINGLE LINK w stats test...\n"); + ret = single_link_w_stats(t); + + /* + * Free test instance, free mempool + */ + rte_mempool_free(t->mbuf_pool); + free(t); + + if (ret != 0) + return ret; + return 0; + +} diff --git a/src/spdk/dpdk/drivers/event/opdl/rte_pmd_opdl_event_version.map b/src/spdk/dpdk/drivers/event/opdl/rte_pmd_opdl_event_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/opdl/rte_pmd_opdl_event_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/event/skeleton/Makefile b/src/spdk/dpdk/drivers/event/skeleton/Makefile new file mode 100644 index 000000000..dc85ad3c4 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/skeleton/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2016 Cavium, Inc +# + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_skeleton_event.a + +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_eventdev +LDLIBS += -lrte_pci -lrte_bus_pci +LDLIBS += -lrte_bus_vdev + +EXPORT_MAP := rte_pmd_skeleton_event_version.map + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV) += skeleton_eventdev.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/event/skeleton/meson.build b/src/spdk/dpdk/drivers/event/skeleton/meson.build new file mode 100644 index 000000000..acfe15653 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/skeleton/meson.build @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +sources = files('skeleton_eventdev.c') +deps += ['bus_pci', 'bus_vdev'] diff --git a/src/spdk/dpdk/drivers/event/skeleton/rte_pmd_skeleton_event_version.map b/src/spdk/dpdk/drivers/event/skeleton/rte_pmd_skeleton_event_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/skeleton/rte_pmd_skeleton_event_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/event/skeleton/skeleton_eventdev.c b/src/spdk/dpdk/drivers/event/skeleton/skeleton_eventdev.c new file mode 100644 index 000000000..c889220e0 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/skeleton/skeleton_eventdev.c @@ -0,0 +1,477 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016 Cavium, Inc + */ + +#include <assert.h> +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> + +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_dev.h> +#include <rte_eal.h> +#include <rte_log.h> +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_lcore.h> +#include <rte_bus_vdev.h> + +#include "skeleton_eventdev.h" + +#define EVENTDEV_NAME_SKELETON_PMD event_skeleton +/**< Skeleton event device PMD name */ + +static uint16_t +skeleton_eventdev_enqueue(void *port, const struct rte_event *ev) +{ + struct skeleton_port *sp = port; + + RTE_SET_USED(sp); + RTE_SET_USED(ev); + RTE_SET_USED(port); + + return 0; +} + +static uint16_t +skeleton_eventdev_enqueue_burst(void *port, const struct rte_event ev[], + uint16_t nb_events) +{ + struct skeleton_port *sp = port; + + RTE_SET_USED(sp); + RTE_SET_USED(ev); + RTE_SET_USED(port); + RTE_SET_USED(nb_events); + + return 0; +} + +static uint16_t +skeleton_eventdev_dequeue(void *port, struct rte_event *ev, + uint64_t timeout_ticks) +{ + struct skeleton_port *sp = port; + + RTE_SET_USED(sp); + RTE_SET_USED(ev); + RTE_SET_USED(timeout_ticks); + + return 0; +} + +static uint16_t +skeleton_eventdev_dequeue_burst(void *port, struct rte_event ev[], + uint16_t nb_events, uint64_t timeout_ticks) +{ + struct skeleton_port *sp = port; + + RTE_SET_USED(sp); + RTE_SET_USED(ev); + RTE_SET_USED(nb_events); + RTE_SET_USED(timeout_ticks); + + return 0; +} + +static void +skeleton_eventdev_info_get(struct rte_eventdev *dev, + struct rte_event_dev_info *dev_info) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + + dev_info->min_dequeue_timeout_ns = 1; + dev_info->max_dequeue_timeout_ns = 10000; + dev_info->dequeue_timeout_ns = 25; + dev_info->max_event_queues = 64; + dev_info->max_event_queue_flows = (1ULL << 20); + dev_info->max_event_queue_priority_levels = 8; + dev_info->max_event_priority_levels = 8; + dev_info->max_event_ports = 32; + dev_info->max_event_port_dequeue_depth = 16; + dev_info->max_event_port_enqueue_depth = 16; + dev_info->max_num_events = (1ULL << 20); + dev_info->event_dev_cap = RTE_EVENT_DEV_CAP_QUEUE_QOS | + RTE_EVENT_DEV_CAP_BURST_MODE | + RTE_EVENT_DEV_CAP_EVENT_QOS; +} + +static int +skeleton_eventdev_configure(const struct rte_eventdev *dev) +{ + struct rte_eventdev_data *data = dev->data; + struct rte_event_dev_config *conf = &data->dev_conf; + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(conf); + RTE_SET_USED(skel); + + PMD_DRV_LOG(DEBUG, "Configured eventdev devid=%d", dev->data->dev_id); + return 0; +} + +static int +skeleton_eventdev_start(struct rte_eventdev *dev) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + + return 0; +} + +static void +skeleton_eventdev_stop(struct rte_eventdev *dev) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); +} + +static int +skeleton_eventdev_close(struct rte_eventdev *dev) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + + return 0; +} + +static void +skeleton_eventdev_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id, + struct rte_event_queue_conf *queue_conf) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + RTE_SET_USED(queue_id); + + queue_conf->nb_atomic_flows = (1ULL << 20); + queue_conf->nb_atomic_order_sequences = (1ULL << 20); + queue_conf->event_queue_cfg = RTE_EVENT_QUEUE_CFG_ALL_TYPES; + queue_conf->priority = RTE_EVENT_DEV_PRIORITY_NORMAL; +} + +static void +skeleton_eventdev_queue_release(struct rte_eventdev *dev, uint8_t queue_id) +{ + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); +} + +static int +skeleton_eventdev_queue_setup(struct rte_eventdev *dev, uint8_t queue_id, + const struct rte_event_queue_conf *queue_conf) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + RTE_SET_USED(queue_conf); + RTE_SET_USED(queue_id); + + return 0; +} + +static void +skeleton_eventdev_port_def_conf(struct rte_eventdev *dev, uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + RTE_SET_USED(port_id); + + port_conf->new_event_threshold = 32 * 1024; + port_conf->dequeue_depth = 16; + port_conf->enqueue_depth = 16; + port_conf->disable_implicit_release = 0; +} + +static void +skeleton_eventdev_port_release(void *port) +{ + struct skeleton_port *sp = port; + PMD_DRV_FUNC_TRACE(); + + rte_free(sp); +} + +static int +skeleton_eventdev_port_setup(struct rte_eventdev *dev, uint8_t port_id, + const struct rte_event_port_conf *port_conf) +{ + struct skeleton_port *sp; + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + RTE_SET_USED(port_conf); + + /* Free memory prior to re-allocation if needed */ + if (dev->data->ports[port_id] != NULL) { + PMD_DRV_LOG(DEBUG, "Freeing memory prior to re-allocation %d", + port_id); + skeleton_eventdev_port_release(dev->data->ports[port_id]); + dev->data->ports[port_id] = NULL; + } + + /* Allocate event port memory */ + sp = rte_zmalloc_socket("eventdev port", + sizeof(struct skeleton_port), RTE_CACHE_LINE_SIZE, + dev->data->socket_id); + if (sp == NULL) { + PMD_DRV_ERR("Failed to allocate sp port_id=%d", port_id); + return -ENOMEM; + } + + sp->port_id = port_id; + + PMD_DRV_LOG(DEBUG, "[%d] sp=%p", port_id, sp); + + dev->data->ports[port_id] = sp; + return 0; +} + +static int +skeleton_eventdev_port_link(struct rte_eventdev *dev, void *port, + const uint8_t queues[], const uint8_t priorities[], + uint16_t nb_links) +{ + struct skeleton_port *sp = port; + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(sp); + RTE_SET_USED(queues); + RTE_SET_USED(priorities); + + /* Linked all the queues */ + return (int)nb_links; +} + +static int +skeleton_eventdev_port_unlink(struct rte_eventdev *dev, void *port, + uint8_t queues[], uint16_t nb_unlinks) +{ + struct skeleton_port *sp = port; + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(dev); + RTE_SET_USED(sp); + RTE_SET_USED(queues); + + /* Unlinked all the queues */ + return (int)nb_unlinks; + +} + +static int +skeleton_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns, + uint64_t *timeout_ticks) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + uint32_t scale = 1; + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + *timeout_ticks = ns * scale; + + return 0; +} + +static void +skeleton_eventdev_dump(struct rte_eventdev *dev, FILE *f) +{ + struct skeleton_eventdev *skel = skeleton_pmd_priv(dev); + + PMD_DRV_FUNC_TRACE(); + + RTE_SET_USED(skel); + RTE_SET_USED(f); +} + + +/* Initialize and register event driver with DPDK Application */ +static struct rte_eventdev_ops skeleton_eventdev_ops = { + .dev_infos_get = skeleton_eventdev_info_get, + .dev_configure = skeleton_eventdev_configure, + .dev_start = skeleton_eventdev_start, + .dev_stop = skeleton_eventdev_stop, + .dev_close = skeleton_eventdev_close, + .queue_def_conf = skeleton_eventdev_queue_def_conf, + .queue_setup = skeleton_eventdev_queue_setup, + .queue_release = skeleton_eventdev_queue_release, + .port_def_conf = skeleton_eventdev_port_def_conf, + .port_setup = skeleton_eventdev_port_setup, + .port_release = skeleton_eventdev_port_release, + .port_link = skeleton_eventdev_port_link, + .port_unlink = skeleton_eventdev_port_unlink, + .timeout_ticks = skeleton_eventdev_timeout_ticks, + .dump = skeleton_eventdev_dump +}; + +static int +skeleton_eventdev_init(struct rte_eventdev *eventdev) +{ + struct rte_pci_device *pci_dev; + struct skeleton_eventdev *skel = skeleton_pmd_priv(eventdev); + int ret = 0; + + PMD_DRV_FUNC_TRACE(); + + eventdev->dev_ops = &skeleton_eventdev_ops; + eventdev->enqueue = skeleton_eventdev_enqueue; + eventdev->enqueue_burst = skeleton_eventdev_enqueue_burst; + eventdev->dequeue = skeleton_eventdev_dequeue; + eventdev->dequeue_burst = skeleton_eventdev_dequeue_burst; + + /* For secondary processes, the primary has done all the work */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + pci_dev = RTE_DEV_TO_PCI(eventdev->dev); + + skel->reg_base = (uintptr_t)pci_dev->mem_resource[0].addr; + if (!skel->reg_base) { + PMD_DRV_ERR("Failed to map BAR0"); + ret = -ENODEV; + goto fail; + } + + skel->device_id = pci_dev->id.device_id; + skel->vendor_id = pci_dev->id.vendor_id; + skel->subsystem_device_id = pci_dev->id.subsystem_device_id; + skel->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id; + + PMD_DRV_LOG(DEBUG, "pci device (%x:%x) %u:%u:%u:%u", + pci_dev->id.vendor_id, pci_dev->id.device_id, + pci_dev->addr.domain, pci_dev->addr.bus, + pci_dev->addr.devid, pci_dev->addr.function); + + PMD_DRV_LOG(INFO, "dev_id=%d socket_id=%d (%x:%x)", + eventdev->data->dev_id, eventdev->data->socket_id, + skel->vendor_id, skel->device_id); + +fail: + return ret; +} + +/* PCI based event device */ + +#define EVENTDEV_SKEL_VENDOR_ID 0x177d +#define EVENTDEV_SKEL_PRODUCT_ID 0x0001 + +static const struct rte_pci_id pci_id_skeleton_map[] = { + { + RTE_PCI_DEVICE(EVENTDEV_SKEL_VENDOR_ID, + EVENTDEV_SKEL_PRODUCT_ID) + }, + { + .vendor_id = 0, + }, +}; + +static int +event_skeleton_pci_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) +{ + return rte_event_pmd_pci_probe(pci_drv, pci_dev, + sizeof(struct skeleton_eventdev), skeleton_eventdev_init); +} + +static int +event_skeleton_pci_remove(struct rte_pci_device *pci_dev) +{ + return rte_event_pmd_pci_remove(pci_dev, NULL); +} + +static struct rte_pci_driver pci_eventdev_skeleton_pmd = { + .id_table = pci_id_skeleton_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = event_skeleton_pci_probe, + .remove = event_skeleton_pci_remove, +}; + +RTE_PMD_REGISTER_PCI(event_skeleton_pci, pci_eventdev_skeleton_pmd); +RTE_PMD_REGISTER_PCI_TABLE(event_skeleton_pci, pci_id_skeleton_map); + +/* VDEV based event device */ + +static int +skeleton_eventdev_create(const char *name, int socket_id) +{ + struct rte_eventdev *eventdev; + + eventdev = rte_event_pmd_vdev_init(name, + sizeof(struct skeleton_eventdev), socket_id); + if (eventdev == NULL) { + PMD_DRV_ERR("Failed to create eventdev vdev %s", name); + goto fail; + } + + eventdev->dev_ops = &skeleton_eventdev_ops; + eventdev->enqueue = skeleton_eventdev_enqueue; + eventdev->enqueue_burst = skeleton_eventdev_enqueue_burst; + eventdev->dequeue = skeleton_eventdev_dequeue; + eventdev->dequeue_burst = skeleton_eventdev_dequeue_burst; + + return 0; +fail: + return -EFAULT; +} + +static int +skeleton_eventdev_probe(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + RTE_LOG(INFO, PMD, "Initializing %s on NUMA node %d\n", name, + rte_socket_id()); + return skeleton_eventdev_create(name, rte_socket_id()); +} + +static int +skeleton_eventdev_remove(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + PMD_DRV_LOG(INFO, "Closing %s on NUMA node %d", name, rte_socket_id()); + + return rte_event_pmd_vdev_uninit(name); +} + +static struct rte_vdev_driver vdev_eventdev_skeleton_pmd = { + .probe = skeleton_eventdev_probe, + .remove = skeleton_eventdev_remove +}; + +RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_SKELETON_PMD, vdev_eventdev_skeleton_pmd); diff --git a/src/spdk/dpdk/drivers/event/skeleton/skeleton_eventdev.h b/src/spdk/dpdk/drivers/event/skeleton/skeleton_eventdev.h new file mode 100644 index 000000000..ba64b8aea --- /dev/null +++ b/src/spdk/dpdk/drivers/event/skeleton/skeleton_eventdev.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016 Cavium, Inc + */ + +#ifndef __SKELETON_EVENTDEV_H__ +#define __SKELETON_EVENTDEV_H__ + +#include <rte_eventdev_pmd_pci.h> +#include <rte_eventdev_pmd_vdev.h> + +#ifdef RTE_LIBRTE_PMD_SKELETON_EVENTDEV_DEBUG +#define PMD_DRV_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define PMD_DRV_FUNC_TRACE() PMD_DRV_LOG(DEBUG, ">>") +#else +#define PMD_DRV_LOG(level, fmt, args...) do { } while (0) +#define PMD_DRV_FUNC_TRACE() do { } while (0) +#endif + +#define PMD_DRV_ERR(fmt, args...) \ + RTE_LOG(ERR, PMD, "%s(): " fmt "\n", __func__, ## args) + +struct skeleton_eventdev { + uintptr_t reg_base; + uint16_t device_id; + uint16_t vendor_id; + uint16_t subsystem_device_id; + uint16_t subsystem_vendor_id; +} __rte_cache_aligned; + +struct skeleton_port { + uint8_t port_id; +} __rte_cache_aligned; + +static inline struct skeleton_eventdev * +skeleton_pmd_priv(const struct rte_eventdev *eventdev) +{ + return eventdev->data->dev_private; +} + +#endif /* __SKELETON_EVENTDEV_H__ */ diff --git a/src/spdk/dpdk/drivers/event/sw/Makefile b/src/spdk/dpdk/drivers/event/sw/Makefile new file mode 100644 index 000000000..8ea5cceb8 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2016-2017 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_pmd_sw_event.a + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_eventdev -lrte_kvargs -lrte_ring +LDLIBS += -lrte_mempool -lrte_mbuf +LDLIBS += -lrte_bus_vdev + +# versioning export map +EXPORT_MAP := rte_pmd_sw_event_version.map + +# library source files +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_worker.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_scheduler.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_xstats.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw_evdev_selftest.c + +# export include files +SYMLINK-y-include += + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/event/sw/event_ring.h b/src/spdk/dpdk/drivers/event/sw/event_ring.h new file mode 100644 index 000000000..02308728b --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/event_ring.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2017 Intel Corporation + */ + +/* + * Generic ring structure for passing events from one core to another. + * + * Used by the software scheduler for the producer and consumer rings for + * each port, i.e. for passing events from worker cores to scheduler and + * vice-versa. Designed for single-producer, single-consumer use with two + * cores working on each ring. + */ + +#ifndef _EVENT_RING_ +#define _EVENT_RING_ + +#include <stdint.h> + +#include <rte_common.h> +#include <rte_memory.h> +#include <rte_malloc.h> + +#define QE_RING_NAMESIZE 32 + +struct qe_ring { + char name[QE_RING_NAMESIZE] __rte_cache_aligned; + uint32_t ring_size; /* size of memory block allocated to the ring */ + uint32_t mask; /* mask for read/write values == ring_size -1 */ + uint32_t size; /* actual usable space in the ring */ + volatile uint32_t write_idx __rte_cache_aligned; + volatile uint32_t read_idx __rte_cache_aligned; + + struct rte_event ring[0] __rte_cache_aligned; +}; + +static inline struct qe_ring * +qe_ring_create(const char *name, unsigned int size, unsigned int socket_id) +{ + struct qe_ring *retval; + const uint32_t ring_size = rte_align32pow2(size + 1); + size_t memsize = sizeof(*retval) + + (ring_size * sizeof(retval->ring[0])); + + retval = rte_zmalloc_socket(NULL, memsize, 0, socket_id); + if (retval == NULL) + goto end; + + snprintf(retval->name, sizeof(retval->name), "EVDEV_RG_%s", name); + retval->ring_size = ring_size; + retval->mask = ring_size - 1; + retval->size = size; +end: + return retval; +} + +static inline void +qe_ring_destroy(struct qe_ring *r) +{ + rte_free(r); +} + +static __rte_always_inline unsigned int +qe_ring_count(const struct qe_ring *r) +{ + return r->write_idx - r->read_idx; +} + +static __rte_always_inline unsigned int +qe_ring_free_count(const struct qe_ring *r) +{ + return r->size - qe_ring_count(r); +} + +static __rte_always_inline unsigned int +qe_ring_enqueue_burst(struct qe_ring *r, const struct rte_event *qes, + unsigned int nb_qes, uint16_t *free_count) +{ + const uint32_t size = r->size; + const uint32_t mask = r->mask; + const uint32_t read = r->read_idx; + uint32_t write = r->write_idx; + const uint32_t space = read + size - write; + uint32_t i; + + if (space < nb_qes) + nb_qes = space; + + for (i = 0; i < nb_qes; i++, write++) + r->ring[write & mask] = qes[i]; + + rte_smp_wmb(); + + if (nb_qes != 0) + r->write_idx = write; + + *free_count = space - nb_qes; + + return nb_qes; +} + +static __rte_always_inline unsigned int +qe_ring_enqueue_burst_with_ops(struct qe_ring *r, const struct rte_event *qes, + unsigned int nb_qes, uint8_t *ops) +{ + const uint32_t size = r->size; + const uint32_t mask = r->mask; + const uint32_t read = r->read_idx; + uint32_t write = r->write_idx; + const uint32_t space = read + size - write; + uint32_t i; + + if (space < nb_qes) + nb_qes = space; + + for (i = 0; i < nb_qes; i++, write++) { + r->ring[write & mask] = qes[i]; + r->ring[write & mask].op = ops[i]; + } + + rte_smp_wmb(); + + if (nb_qes != 0) + r->write_idx = write; + + return nb_qes; +} + +static __rte_always_inline unsigned int +qe_ring_dequeue_burst(struct qe_ring *r, struct rte_event *qes, + unsigned int nb_qes) +{ + const uint32_t mask = r->mask; + uint32_t read = r->read_idx; + const uint32_t write = r->write_idx; + const uint32_t items = write - read; + uint32_t i; + + if (items < nb_qes) + nb_qes = items; + + + for (i = 0; i < nb_qes; i++, read++) + qes[i] = r->ring[read & mask]; + + rte_smp_rmb(); + + if (nb_qes != 0) + r->read_idx += nb_qes; + + return nb_qes; +} + +#endif diff --git a/src/spdk/dpdk/drivers/event/sw/iq_chunk.h b/src/spdk/dpdk/drivers/event/sw/iq_chunk.h new file mode 100644 index 000000000..31d013eab --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/iq_chunk.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _IQ_CHUNK_H_ +#define _IQ_CHUNK_H_ + +#include <stdint.h> +#include <stdbool.h> +#include <rte_eventdev.h> + +#define IQ_ROB_NAMESIZE 12 + +struct sw_queue_chunk { + struct rte_event events[SW_EVS_PER_Q_CHUNK]; + struct sw_queue_chunk *next; +} __rte_cache_aligned; + +static __rte_always_inline bool +iq_empty(struct sw_iq *iq) +{ + return (iq->count == 0); +} + +static __rte_always_inline uint16_t +iq_count(const struct sw_iq *iq) +{ + return iq->count; +} + +static __rte_always_inline struct sw_queue_chunk * +iq_alloc_chunk(struct sw_evdev *sw) +{ + struct sw_queue_chunk *chunk = sw->chunk_list_head; + sw->chunk_list_head = chunk->next; + chunk->next = NULL; + return chunk; +} + +static __rte_always_inline void +iq_free_chunk(struct sw_evdev *sw, struct sw_queue_chunk *chunk) +{ + chunk->next = sw->chunk_list_head; + sw->chunk_list_head = chunk; +} + +static __rte_always_inline void +iq_free_chunk_list(struct sw_evdev *sw, struct sw_queue_chunk *head) +{ + while (head) { + struct sw_queue_chunk *next; + next = head->next; + iq_free_chunk(sw, head); + head = next; + } +} + +static __rte_always_inline void +iq_init(struct sw_evdev *sw, struct sw_iq *iq) +{ + iq->head = iq_alloc_chunk(sw); + iq->tail = iq->head; + iq->head_idx = 0; + iq->tail_idx = 0; + iq->count = 0; +} + +static __rte_always_inline void +iq_enqueue(struct sw_evdev *sw, struct sw_iq *iq, const struct rte_event *ev) +{ + iq->tail->events[iq->tail_idx++] = *ev; + iq->count++; + + if (unlikely(iq->tail_idx == SW_EVS_PER_Q_CHUNK)) { + /* The number of chunks is defined in relation to the total + * number of inflight events and number of IQS such that + * allocation will always succeed. + */ + struct sw_queue_chunk *chunk = iq_alloc_chunk(sw); + iq->tail->next = chunk; + iq->tail = chunk; + iq->tail_idx = 0; + } +} + +static __rte_always_inline void +iq_pop(struct sw_evdev *sw, struct sw_iq *iq) +{ + iq->head_idx++; + iq->count--; + + if (unlikely(iq->head_idx == SW_EVS_PER_Q_CHUNK)) { + struct sw_queue_chunk *next = iq->head->next; + iq_free_chunk(sw, iq->head); + iq->head = next; + iq->head_idx = 0; + } +} + +static __rte_always_inline const struct rte_event * +iq_peek(struct sw_iq *iq) +{ + return &iq->head->events[iq->head_idx]; +} + +/* Note: the caller must ensure that count <= iq_count() */ +static __rte_always_inline uint16_t +iq_dequeue_burst(struct sw_evdev *sw, + struct sw_iq *iq, + struct rte_event *ev, + uint16_t count) +{ + struct sw_queue_chunk *current; + uint16_t total, index; + + count = RTE_MIN(count, iq_count(iq)); + + current = iq->head; + index = iq->head_idx; + total = 0; + + /* Loop over the chunks */ + while (1) { + struct sw_queue_chunk *next; + for (; index < SW_EVS_PER_Q_CHUNK;) { + ev[total++] = current->events[index++]; + + if (unlikely(total == count)) + goto done; + } + + /* Move to the next chunk */ + next = current->next; + iq_free_chunk(sw, current); + current = next; + index = 0; + } + +done: + if (unlikely(index == SW_EVS_PER_Q_CHUNK)) { + struct sw_queue_chunk *next = current->next; + iq_free_chunk(sw, current); + iq->head = next; + iq->head_idx = 0; + } else { + iq->head = current; + iq->head_idx = index; + } + + iq->count -= total; + + return total; +} + +static __rte_always_inline void +iq_put_back(struct sw_evdev *sw, + struct sw_iq *iq, + struct rte_event *ev, + unsigned int count) +{ + /* Put back events that fit in the current head chunk. If necessary, + * put back events in a new head chunk. The caller must ensure that + * count <= SW_EVS_PER_Q_CHUNK, to ensure that at most one new head is + * needed. + */ + uint16_t avail_space = iq->head_idx; + + if (avail_space >= count) { + const uint16_t idx = avail_space - count; + uint16_t i; + + for (i = 0; i < count; i++) + iq->head->events[idx + i] = ev[i]; + + iq->head_idx = idx; + } else if (avail_space < count) { + const uint16_t remaining = count - avail_space; + struct sw_queue_chunk *new_head; + uint16_t i; + + for (i = 0; i < avail_space; i++) + iq->head->events[i] = ev[remaining + i]; + + new_head = iq_alloc_chunk(sw); + new_head->next = iq->head; + iq->head = new_head; + iq->head_idx = SW_EVS_PER_Q_CHUNK - remaining; + + for (i = 0; i < remaining; i++) + iq->head->events[iq->head_idx + i] = ev[i]; + } + + iq->count += count; +} + +#endif /* _IQ_CHUNK_H_ */ diff --git a/src/spdk/dpdk/drivers/event/sw/meson.build b/src/spdk/dpdk/drivers/event/sw/meson.build new file mode 100644 index 000000000..985012219 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/meson.build @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +sources = files('sw_evdev_scheduler.c', + 'sw_evdev_selftest.c', + 'sw_evdev_worker.c', + 'sw_evdev_xstats.c', + 'sw_evdev.c' +) +deps += ['hash', 'bus_vdev'] diff --git a/src/spdk/dpdk/drivers/event/sw/rte_pmd_sw_event_version.map b/src/spdk/dpdk/drivers/event/sw/rte_pmd_sw_event_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/rte_pmd_sw_event_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/event/sw/sw_evdev.c b/src/spdk/dpdk/drivers/event/sw/sw_evdev.c new file mode 100644 index 000000000..fb8e8bebb --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/sw_evdev.c @@ -0,0 +1,1095 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2017 Intel Corporation + */ + +#include <inttypes.h> +#include <string.h> + +#include <rte_bus_vdev.h> +#include <rte_kvargs.h> +#include <rte_ring.h> +#include <rte_errno.h> +#include <rte_event_ring.h> +#include <rte_service_component.h> + +#include "sw_evdev.h" +#include "iq_chunk.h" + +#define EVENTDEV_NAME_SW_PMD event_sw +#define NUMA_NODE_ARG "numa_node" +#define SCHED_QUANTA_ARG "sched_quanta" +#define CREDIT_QUANTA_ARG "credit_quanta" + +static void +sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info); + +static int +sw_port_link(struct rte_eventdev *dev, void *port, const uint8_t queues[], + const uint8_t priorities[], uint16_t num) +{ + struct sw_port *p = port; + struct sw_evdev *sw = sw_pmd_priv(dev); + int i; + + RTE_SET_USED(priorities); + for (i = 0; i < num; i++) { + struct sw_qid *q = &sw->qids[queues[i]]; + unsigned int j; + + /* check for qid map overflow */ + if (q->cq_num_mapped_cqs >= RTE_DIM(q->cq_map)) { + rte_errno = EDQUOT; + break; + } + + if (p->is_directed && p->num_qids_mapped > 0) { + rte_errno = EDQUOT; + break; + } + + for (j = 0; j < q->cq_num_mapped_cqs; j++) { + if (q->cq_map[j] == p->id) + break; + } + + /* check if port is already linked */ + if (j < q->cq_num_mapped_cqs) + continue; + + if (q->type == SW_SCHED_TYPE_DIRECT) { + /* check directed qids only map to one port */ + if (p->num_qids_mapped > 0) { + rte_errno = EDQUOT; + break; + } + /* check port only takes a directed flow */ + if (num > 1) { + rte_errno = EDQUOT; + break; + } + + p->is_directed = 1; + p->num_qids_mapped = 1; + } else if (q->type == RTE_SCHED_TYPE_ORDERED) { + p->num_ordered_qids++; + p->num_qids_mapped++; + } else if (q->type == RTE_SCHED_TYPE_ATOMIC || + q->type == RTE_SCHED_TYPE_PARALLEL) { + p->num_qids_mapped++; + } + + q->cq_map[q->cq_num_mapped_cqs] = p->id; + rte_smp_wmb(); + q->cq_num_mapped_cqs++; + } + return i; +} + +static int +sw_port_unlink(struct rte_eventdev *dev, void *port, uint8_t queues[], + uint16_t nb_unlinks) +{ + struct sw_port *p = port; + struct sw_evdev *sw = sw_pmd_priv(dev); + unsigned int i, j; + + int unlinked = 0; + for (i = 0; i < nb_unlinks; i++) { + struct sw_qid *q = &sw->qids[queues[i]]; + for (j = 0; j < q->cq_num_mapped_cqs; j++) { + if (q->cq_map[j] == p->id) { + q->cq_map[j] = + q->cq_map[q->cq_num_mapped_cqs - 1]; + rte_smp_wmb(); + q->cq_num_mapped_cqs--; + unlinked++; + + p->num_qids_mapped--; + + if (q->type == RTE_SCHED_TYPE_ORDERED) + p->num_ordered_qids--; + + continue; + } + } + } + + p->unlinks_in_progress += unlinked; + rte_smp_mb(); + + return unlinked; +} + +static int +sw_port_unlinks_in_progress(struct rte_eventdev *dev, void *port) +{ + RTE_SET_USED(dev); + struct sw_port *p = port; + return p->unlinks_in_progress; +} + +static int +sw_port_setup(struct rte_eventdev *dev, uint8_t port_id, + const struct rte_event_port_conf *conf) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + struct sw_port *p = &sw->ports[port_id]; + char buf[RTE_RING_NAMESIZE]; + unsigned int i; + + struct rte_event_dev_info info; + sw_info_get(dev, &info); + + /* detect re-configuring and return credits to instance if needed */ + if (p->initialized) { + /* taking credits from pool is done one quanta at a time, and + * credits may be spend (counted in p->inflights) or still + * available in the port (p->inflight_credits). We must return + * the sum to no leak credits + */ + int possible_inflights = p->inflight_credits + p->inflights; + rte_atomic32_sub(&sw->inflights, possible_inflights); + } + + *p = (struct sw_port){0}; /* zero entire structure */ + p->id = port_id; + p->sw = sw; + + /* check to see if rings exists - port_setup() can be called multiple + * times legally (assuming device is stopped). If ring exists, free it + * to so it gets re-created with the correct size + */ + snprintf(buf, sizeof(buf), "sw%d_p%u_%s", dev->data->dev_id, + port_id, "rx_worker_ring"); + struct rte_event_ring *existing_ring = rte_event_ring_lookup(buf); + if (existing_ring) + rte_event_ring_free(existing_ring); + + p->rx_worker_ring = rte_event_ring_create(buf, MAX_SW_PROD_Q_DEPTH, + dev->data->socket_id, + RING_F_SP_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ); + if (p->rx_worker_ring == NULL) { + SW_LOG_ERR("Error creating RX worker ring for port %d\n", + port_id); + return -1; + } + + p->inflight_max = conf->new_event_threshold; + p->implicit_release = !conf->disable_implicit_release; + + /* check if ring exists, same as rx_worker above */ + snprintf(buf, sizeof(buf), "sw%d_p%u, %s", dev->data->dev_id, + port_id, "cq_worker_ring"); + existing_ring = rte_event_ring_lookup(buf); + if (existing_ring) + rte_event_ring_free(existing_ring); + + p->cq_worker_ring = rte_event_ring_create(buf, conf->dequeue_depth, + dev->data->socket_id, + RING_F_SP_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ); + if (p->cq_worker_ring == NULL) { + rte_event_ring_free(p->rx_worker_ring); + SW_LOG_ERR("Error creating CQ worker ring for port %d\n", + port_id); + return -1; + } + sw->cq_ring_space[port_id] = conf->dequeue_depth; + + /* set hist list contents to empty */ + for (i = 0; i < SW_PORT_HIST_LIST; i++) { + p->hist_list[i].fid = -1; + p->hist_list[i].qid = -1; + } + dev->data->ports[port_id] = p; + + rte_smp_wmb(); + p->initialized = 1; + return 0; +} + +static void +sw_port_release(void *port) +{ + struct sw_port *p = (void *)port; + if (p == NULL) + return; + + rte_event_ring_free(p->rx_worker_ring); + rte_event_ring_free(p->cq_worker_ring); + memset(p, 0, sizeof(*p)); +} + +static int32_t +qid_init(struct sw_evdev *sw, unsigned int idx, int type, + const struct rte_event_queue_conf *queue_conf) +{ + unsigned int i; + int dev_id = sw->data->dev_id; + int socket_id = sw->data->socket_id; + char buf[IQ_ROB_NAMESIZE]; + struct sw_qid *qid = &sw->qids[idx]; + + /* Initialize the FID structures to no pinning (-1), and zero packets */ + const struct sw_fid_t fid = {.cq = -1, .pcount = 0}; + for (i = 0; i < RTE_DIM(qid->fids); i++) + qid->fids[i] = fid; + + qid->id = idx; + qid->type = type; + qid->priority = queue_conf->priority; + + if (qid->type == RTE_SCHED_TYPE_ORDERED) { + char ring_name[RTE_RING_NAMESIZE]; + uint32_t window_size; + + /* rte_ring and window_size_mask require require window_size to + * be a power-of-2. + */ + window_size = rte_align32pow2( + queue_conf->nb_atomic_order_sequences); + + qid->window_size = window_size - 1; + + if (!window_size) { + SW_LOG_DBG( + "invalid reorder_window_size for ordered queue\n" + ); + goto cleanup; + } + + snprintf(buf, sizeof(buf), "sw%d_iq_%d_rob", dev_id, i); + qid->reorder_buffer = rte_zmalloc_socket(buf, + window_size * sizeof(qid->reorder_buffer[0]), + 0, socket_id); + if (!qid->reorder_buffer) { + SW_LOG_DBG("reorder_buffer malloc failed\n"); + goto cleanup; + } + + memset(&qid->reorder_buffer[0], + 0, + window_size * sizeof(qid->reorder_buffer[0])); + + snprintf(ring_name, sizeof(ring_name), "sw%d_q%d_freelist", + dev_id, idx); + + /* lookup the ring, and if it already exists, free it */ + struct rte_ring *cleanup = rte_ring_lookup(ring_name); + if (cleanup) + rte_ring_free(cleanup); + + qid->reorder_buffer_freelist = rte_ring_create(ring_name, + window_size, + socket_id, + RING_F_SP_ENQ | RING_F_SC_DEQ); + if (!qid->reorder_buffer_freelist) { + SW_LOG_DBG("freelist ring create failed"); + goto cleanup; + } + + /* Populate the freelist with reorder buffer entries. Enqueue + * 'window_size - 1' entries because the rte_ring holds only + * that many. + */ + for (i = 0; i < window_size - 1; i++) { + if (rte_ring_sp_enqueue(qid->reorder_buffer_freelist, + &qid->reorder_buffer[i]) < 0) + goto cleanup; + } + + qid->reorder_buffer_index = 0; + qid->cq_next_tx = 0; + } + + qid->initialized = 1; + + return 0; + +cleanup: + if (qid->reorder_buffer) { + rte_free(qid->reorder_buffer); + qid->reorder_buffer = NULL; + } + + if (qid->reorder_buffer_freelist) { + rte_ring_free(qid->reorder_buffer_freelist); + qid->reorder_buffer_freelist = NULL; + } + + return -EINVAL; +} + +static void +sw_queue_release(struct rte_eventdev *dev, uint8_t id) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + struct sw_qid *qid = &sw->qids[id]; + + if (qid->type == RTE_SCHED_TYPE_ORDERED) { + rte_free(qid->reorder_buffer); + rte_ring_free(qid->reorder_buffer_freelist); + } + memset(qid, 0, sizeof(*qid)); +} + +static int +sw_queue_setup(struct rte_eventdev *dev, uint8_t queue_id, + const struct rte_event_queue_conf *conf) +{ + int type; + + type = conf->schedule_type; + + if (RTE_EVENT_QUEUE_CFG_SINGLE_LINK & conf->event_queue_cfg) { + type = SW_SCHED_TYPE_DIRECT; + } else if (RTE_EVENT_QUEUE_CFG_ALL_TYPES + & conf->event_queue_cfg) { + SW_LOG_ERR("QUEUE_CFG_ALL_TYPES not supported\n"); + return -ENOTSUP; + } + + struct sw_evdev *sw = sw_pmd_priv(dev); + + if (sw->qids[queue_id].initialized) + sw_queue_release(dev, queue_id); + + return qid_init(sw, queue_id, type, conf); +} + +static void +sw_init_qid_iqs(struct sw_evdev *sw) +{ + int i, j; + + /* Initialize the IQ memory of all configured qids */ + for (i = 0; i < RTE_EVENT_MAX_QUEUES_PER_DEV; i++) { + struct sw_qid *qid = &sw->qids[i]; + + if (!qid->initialized) + continue; + + for (j = 0; j < SW_IQS_MAX; j++) + iq_init(sw, &qid->iq[j]); + } +} + +static int +sw_qids_empty(struct sw_evdev *sw) +{ + unsigned int i, j; + + for (i = 0; i < sw->qid_count; i++) { + for (j = 0; j < SW_IQS_MAX; j++) { + if (iq_count(&sw->qids[i].iq[j])) + return 0; + } + } + + return 1; +} + +static int +sw_ports_empty(struct sw_evdev *sw) +{ + unsigned int i; + + for (i = 0; i < sw->port_count; i++) { + if ((rte_event_ring_count(sw->ports[i].rx_worker_ring)) || + rte_event_ring_count(sw->ports[i].cq_worker_ring)) + return 0; + } + + return 1; +} + +static void +sw_drain_ports(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + eventdev_stop_flush_t flush; + unsigned int i; + uint8_t dev_id; + void *arg; + + flush = dev->dev_ops->dev_stop_flush; + dev_id = dev->data->dev_id; + arg = dev->data->dev_stop_flush_arg; + + for (i = 0; i < sw->port_count; i++) { + struct rte_event ev; + + while (rte_event_dequeue_burst(dev_id, i, &ev, 1, 0)) { + if (flush) + flush(dev_id, ev, arg); + + ev.op = RTE_EVENT_OP_RELEASE; + rte_event_enqueue_burst(dev_id, i, &ev, 1); + } + } +} + +static void +sw_drain_queue(struct rte_eventdev *dev, struct sw_iq *iq) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + eventdev_stop_flush_t flush; + uint8_t dev_id; + void *arg; + + flush = dev->dev_ops->dev_stop_flush; + dev_id = dev->data->dev_id; + arg = dev->data->dev_stop_flush_arg; + + while (iq_count(iq) > 0) { + struct rte_event ev; + + iq_dequeue_burst(sw, iq, &ev, 1); + + if (flush) + flush(dev_id, ev, arg); + } +} + +static void +sw_drain_queues(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + unsigned int i, j; + + for (i = 0; i < sw->qid_count; i++) { + for (j = 0; j < SW_IQS_MAX; j++) + sw_drain_queue(dev, &sw->qids[i].iq[j]); + } +} + +static void +sw_clean_qid_iqs(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + int i, j; + + /* Release the IQ memory of all configured qids */ + for (i = 0; i < RTE_EVENT_MAX_QUEUES_PER_DEV; i++) { + struct sw_qid *qid = &sw->qids[i]; + + for (j = 0; j < SW_IQS_MAX; j++) { + if (!qid->iq[j].head) + continue; + iq_free_chunk_list(sw, qid->iq[j].head); + qid->iq[j].head = NULL; + } + } +} + +static void +sw_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id, + struct rte_event_queue_conf *conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(queue_id); + + static const struct rte_event_queue_conf default_conf = { + .nb_atomic_flows = 4096, + .nb_atomic_order_sequences = 1, + .schedule_type = RTE_SCHED_TYPE_ATOMIC, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + }; + + *conf = default_conf; +} + +static void +sw_port_def_conf(struct rte_eventdev *dev, uint8_t port_id, + struct rte_event_port_conf *port_conf) +{ + RTE_SET_USED(dev); + RTE_SET_USED(port_id); + + port_conf->new_event_threshold = 1024; + port_conf->dequeue_depth = 16; + port_conf->enqueue_depth = 16; + port_conf->disable_implicit_release = 0; +} + +static int +sw_dev_configure(const struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + const struct rte_eventdev_data *data = dev->data; + const struct rte_event_dev_config *conf = &data->dev_conf; + int num_chunks, i; + + sw->qid_count = conf->nb_event_queues; + sw->port_count = conf->nb_event_ports; + sw->nb_events_limit = conf->nb_events_limit; + rte_atomic32_set(&sw->inflights, 0); + + /* Number of chunks sized for worst-case spread of events across IQs */ + num_chunks = ((SW_INFLIGHT_EVENTS_TOTAL/SW_EVS_PER_Q_CHUNK)+1) + + sw->qid_count*SW_IQS_MAX*2; + + /* If this is a reconfiguration, free the previous IQ allocation. All + * IQ chunk references were cleaned out of the QIDs in sw_stop(), and + * will be reinitialized in sw_start(). + */ + if (sw->chunks) + rte_free(sw->chunks); + + sw->chunks = rte_malloc_socket(NULL, + sizeof(struct sw_queue_chunk) * + num_chunks, + 0, + sw->data->socket_id); + if (!sw->chunks) + return -ENOMEM; + + sw->chunk_list_head = NULL; + for (i = 0; i < num_chunks; i++) + iq_free_chunk(sw, &sw->chunks[i]); + + if (conf->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) + return -ENOTSUP; + + return 0; +} + +struct rte_eth_dev; + +static int +sw_eth_rx_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_eth_dev *eth_dev, + uint32_t *caps) +{ + RTE_SET_USED(dev); + RTE_SET_USED(eth_dev); + *caps = RTE_EVENT_ETH_RX_ADAPTER_SW_CAP; + return 0; +} + +static int +sw_timer_adapter_caps_get(const struct rte_eventdev *dev, + uint64_t flags, + uint32_t *caps, + const struct rte_event_timer_adapter_ops **ops) +{ + RTE_SET_USED(dev); + RTE_SET_USED(flags); + *caps = 0; + + /* Use default SW ops */ + *ops = NULL; + + return 0; +} + +static int +sw_crypto_adapter_caps_get(const struct rte_eventdev *dev, + const struct rte_cryptodev *cdev, + uint32_t *caps) +{ + RTE_SET_USED(dev); + RTE_SET_USED(cdev); + *caps = RTE_EVENT_CRYPTO_ADAPTER_SW_CAP; + return 0; +} + +static void +sw_info_get(struct rte_eventdev *dev, struct rte_event_dev_info *info) +{ + RTE_SET_USED(dev); + + static const struct rte_event_dev_info evdev_sw_info = { + .driver_name = SW_PMD_NAME, + .max_event_queues = RTE_EVENT_MAX_QUEUES_PER_DEV, + .max_event_queue_flows = SW_QID_NUM_FIDS, + .max_event_queue_priority_levels = SW_Q_PRIORITY_MAX, + .max_event_priority_levels = SW_IQS_MAX, + .max_event_ports = SW_PORTS_MAX, + .max_event_port_dequeue_depth = MAX_SW_CONS_Q_DEPTH, + .max_event_port_enqueue_depth = MAX_SW_PROD_Q_DEPTH, + .max_num_events = SW_INFLIGHT_EVENTS_TOTAL, + .event_dev_cap = ( + RTE_EVENT_DEV_CAP_QUEUE_QOS | + RTE_EVENT_DEV_CAP_BURST_MODE | + RTE_EVENT_DEV_CAP_EVENT_QOS | + RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE| + RTE_EVENT_DEV_CAP_RUNTIME_PORT_LINK | + RTE_EVENT_DEV_CAP_MULTIPLE_QUEUE_PORT | + RTE_EVENT_DEV_CAP_NONSEQ_MODE), + }; + + *info = evdev_sw_info; +} + +static void +sw_dump(struct rte_eventdev *dev, FILE *f) +{ + const struct sw_evdev *sw = sw_pmd_priv(dev); + + static const char * const q_type_strings[] = { + "Ordered", "Atomic", "Parallel", "Directed" + }; + uint32_t i; + fprintf(f, "EventDev %s: ports %d, qids %d\n", "todo-fix-name", + sw->port_count, sw->qid_count); + + fprintf(f, "\trx %"PRIu64"\n\tdrop %"PRIu64"\n\ttx %"PRIu64"\n", + sw->stats.rx_pkts, sw->stats.rx_dropped, sw->stats.tx_pkts); + fprintf(f, "\tsched calls: %"PRIu64"\n", sw->sched_called); + fprintf(f, "\tsched cq/qid call: %"PRIu64"\n", sw->sched_cq_qid_called); + fprintf(f, "\tsched no IQ enq: %"PRIu64"\n", sw->sched_no_iq_enqueues); + fprintf(f, "\tsched no CQ enq: %"PRIu64"\n", sw->sched_no_cq_enqueues); + uint32_t inflights = rte_atomic32_read(&sw->inflights); + uint32_t credits = sw->nb_events_limit - inflights; + fprintf(f, "\tinflight %d, credits: %d\n", inflights, credits); + +#define COL_RED "\x1b[31m" +#define COL_RESET "\x1b[0m" + + for (i = 0; i < sw->port_count; i++) { + int max, j; + const struct sw_port *p = &sw->ports[i]; + if (!p->initialized) { + fprintf(f, " %sPort %d not initialized.%s\n", + COL_RED, i, COL_RESET); + continue; + } + fprintf(f, " Port %d %s\n", i, + p->is_directed ? " (SingleCons)" : ""); + fprintf(f, "\trx %"PRIu64"\tdrop %"PRIu64"\ttx %"PRIu64 + "\t%sinflight %d%s\n", sw->ports[i].stats.rx_pkts, + sw->ports[i].stats.rx_dropped, + sw->ports[i].stats.tx_pkts, + (p->inflights == p->inflight_max) ? + COL_RED : COL_RESET, + sw->ports[i].inflights, COL_RESET); + + fprintf(f, "\tMax New: %u" + "\tAvg cycles PP: %"PRIu64"\tCredits: %u\n", + sw->ports[i].inflight_max, + sw->ports[i].avg_pkt_ticks, + sw->ports[i].inflight_credits); + fprintf(f, "\tReceive burst distribution:\n"); + float zp_percent = p->zero_polls * 100.0 / p->total_polls; + fprintf(f, zp_percent < 10 ? "\t\t0:%.02f%% " : "\t\t0:%.0f%% ", + zp_percent); + for (max = (int)RTE_DIM(p->poll_buckets); max-- > 0;) + if (p->poll_buckets[max] != 0) + break; + for (j = 0; j <= max; j++) { + if (p->poll_buckets[j] != 0) { + float poll_pc = p->poll_buckets[j] * 100.0 / + p->total_polls; + fprintf(f, "%u-%u:%.02f%% ", + ((j << SW_DEQ_STAT_BUCKET_SHIFT) + 1), + ((j+1) << SW_DEQ_STAT_BUCKET_SHIFT), + poll_pc); + } + } + fprintf(f, "\n"); + + if (p->rx_worker_ring) { + uint64_t used = rte_event_ring_count(p->rx_worker_ring); + uint64_t space = rte_event_ring_free_count( + p->rx_worker_ring); + const char *col = (space == 0) ? COL_RED : COL_RESET; + fprintf(f, "\t%srx ring used: %4"PRIu64"\tfree: %4" + PRIu64 COL_RESET"\n", col, used, space); + } else + fprintf(f, "\trx ring not initialized.\n"); + + if (p->cq_worker_ring) { + uint64_t used = rte_event_ring_count(p->cq_worker_ring); + uint64_t space = rte_event_ring_free_count( + p->cq_worker_ring); + const char *col = (space == 0) ? COL_RED : COL_RESET; + fprintf(f, "\t%scq ring used: %4"PRIu64"\tfree: %4" + PRIu64 COL_RESET"\n", col, used, space); + } else + fprintf(f, "\tcq ring not initialized.\n"); + } + + for (i = 0; i < sw->qid_count; i++) { + const struct sw_qid *qid = &sw->qids[i]; + if (!qid->initialized) { + fprintf(f, " %sQueue %d not initialized.%s\n", + COL_RED, i, COL_RESET); + continue; + } + int affinities_per_port[SW_PORTS_MAX] = {0}; + uint32_t inflights = 0; + + fprintf(f, " Queue %d (%s)\n", i, q_type_strings[qid->type]); + fprintf(f, "\trx %"PRIu64"\tdrop %"PRIu64"\ttx %"PRIu64"\n", + qid->stats.rx_pkts, qid->stats.rx_dropped, + qid->stats.tx_pkts); + if (qid->type == RTE_SCHED_TYPE_ORDERED) { + struct rte_ring *rob_buf_free = + qid->reorder_buffer_freelist; + if (rob_buf_free) + fprintf(f, "\tReorder entries in use: %u\n", + rte_ring_free_count(rob_buf_free)); + else + fprintf(f, + "\tReorder buffer not initialized\n"); + } + + uint32_t flow; + for (flow = 0; flow < RTE_DIM(qid->fids); flow++) + if (qid->fids[flow].cq != -1) { + affinities_per_port[qid->fids[flow].cq]++; + inflights += qid->fids[flow].pcount; + } + + uint32_t port; + fprintf(f, "\tPer Port Stats:\n"); + for (port = 0; port < sw->port_count; port++) { + fprintf(f, "\t Port %d: Pkts: %"PRIu64, port, + qid->to_port[port]); + fprintf(f, "\tFlows: %d\n", affinities_per_port[port]); + } + + uint32_t iq; + uint32_t iq_printed = 0; + for (iq = 0; iq < SW_IQS_MAX; iq++) { + if (!qid->iq[iq].head) { + fprintf(f, "\tiq %d is not initialized.\n", iq); + iq_printed = 1; + continue; + } + uint32_t used = iq_count(&qid->iq[iq]); + const char *col = COL_RESET; + if (used > 0) { + fprintf(f, "\t%siq %d: Used %d" + COL_RESET"\n", col, iq, used); + iq_printed = 1; + } + } + if (iq_printed == 0) + fprintf(f, "\t-- iqs empty --\n"); + } +} + +static int +sw_start(struct rte_eventdev *dev) +{ + unsigned int i, j; + struct sw_evdev *sw = sw_pmd_priv(dev); + + rte_service_component_runstate_set(sw->service_id, 1); + + /* check a service core is mapped to this service */ + if (!rte_service_runstate_get(sw->service_id)) { + SW_LOG_ERR("Warning: No Service core enabled on service %s\n", + sw->service_name); + return -ENOENT; + } + + /* check all ports are set up */ + for (i = 0; i < sw->port_count; i++) + if (sw->ports[i].rx_worker_ring == NULL) { + SW_LOG_ERR("Port %d not configured\n", i); + return -ESTALE; + } + + /* check all queues are configured and mapped to ports*/ + for (i = 0; i < sw->qid_count; i++) + if (!sw->qids[i].initialized || + sw->qids[i].cq_num_mapped_cqs == 0) { + SW_LOG_ERR("Queue %d not configured\n", i); + return -ENOLINK; + } + + /* build up our prioritized array of qids */ + /* We don't use qsort here, as if all/multiple entries have the same + * priority, the result is non-deterministic. From "man 3 qsort": + * "If two members compare as equal, their order in the sorted + * array is undefined." + */ + uint32_t qidx = 0; + for (j = 0; j <= RTE_EVENT_DEV_PRIORITY_LOWEST; j++) { + for (i = 0; i < sw->qid_count; i++) { + if (sw->qids[i].priority == j) { + sw->qids_prioritized[qidx] = &sw->qids[i]; + qidx++; + } + } + } + + sw_init_qid_iqs(sw); + + if (sw_xstats_init(sw) < 0) + return -EINVAL; + + rte_smp_wmb(); + sw->started = 1; + + return 0; +} + +static void +sw_stop(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + int32_t runstate; + + /* Stop the scheduler if it's running */ + runstate = rte_service_runstate_get(sw->service_id); + if (runstate == 1) + rte_service_runstate_set(sw->service_id, 0); + + while (rte_service_may_be_active(sw->service_id)) + rte_pause(); + + /* Flush all events out of the device */ + while (!(sw_qids_empty(sw) && sw_ports_empty(sw))) { + sw_event_schedule(dev); + sw_drain_ports(dev); + sw_drain_queues(dev); + } + + sw_clean_qid_iqs(dev); + sw_xstats_uninit(sw); + sw->started = 0; + rte_smp_wmb(); + + if (runstate == 1) + rte_service_runstate_set(sw->service_id, 1); +} + +static int +sw_close(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + uint32_t i; + + for (i = 0; i < sw->qid_count; i++) + sw_queue_release(dev, i); + sw->qid_count = 0; + + for (i = 0; i < sw->port_count; i++) + sw_port_release(&sw->ports[i]); + sw->port_count = 0; + + memset(&sw->stats, 0, sizeof(sw->stats)); + sw->sched_called = 0; + sw->sched_no_iq_enqueues = 0; + sw->sched_no_cq_enqueues = 0; + sw->sched_cq_qid_called = 0; + + return 0; +} + +static int +assign_numa_node(const char *key __rte_unused, const char *value, void *opaque) +{ + int *socket_id = opaque; + *socket_id = atoi(value); + if (*socket_id >= RTE_MAX_NUMA_NODES) + return -1; + return 0; +} + +static int +set_sched_quanta(const char *key __rte_unused, const char *value, void *opaque) +{ + int *quanta = opaque; + *quanta = atoi(value); + if (*quanta < 0 || *quanta >= 4096) + return -1; + return 0; +} + +static int +set_credit_quanta(const char *key __rte_unused, const char *value, void *opaque) +{ + int *credit = opaque; + *credit = atoi(value); + if (*credit < 0 || *credit >= 128) + return -1; + return 0; +} + + +static int32_t sw_sched_service_func(void *args) +{ + struct rte_eventdev *dev = args; + sw_event_schedule(dev); + return 0; +} + +static int +sw_probe(struct rte_vdev_device *vdev) +{ + static struct rte_eventdev_ops evdev_sw_ops = { + .dev_configure = sw_dev_configure, + .dev_infos_get = sw_info_get, + .dev_close = sw_close, + .dev_start = sw_start, + .dev_stop = sw_stop, + .dump = sw_dump, + + .queue_def_conf = sw_queue_def_conf, + .queue_setup = sw_queue_setup, + .queue_release = sw_queue_release, + .port_def_conf = sw_port_def_conf, + .port_setup = sw_port_setup, + .port_release = sw_port_release, + .port_link = sw_port_link, + .port_unlink = sw_port_unlink, + .port_unlinks_in_progress = sw_port_unlinks_in_progress, + + .eth_rx_adapter_caps_get = sw_eth_rx_adapter_caps_get, + + .timer_adapter_caps_get = sw_timer_adapter_caps_get, + + .crypto_adapter_caps_get = sw_crypto_adapter_caps_get, + + .xstats_get = sw_xstats_get, + .xstats_get_names = sw_xstats_get_names, + .xstats_get_by_name = sw_xstats_get_by_name, + .xstats_reset = sw_xstats_reset, + + .dev_selftest = test_sw_eventdev, + }; + + static const char *const args[] = { + NUMA_NODE_ARG, + SCHED_QUANTA_ARG, + CREDIT_QUANTA_ARG, + NULL + }; + const char *name; + const char *params; + struct rte_eventdev *dev; + struct sw_evdev *sw; + int socket_id = rte_socket_id(); + int sched_quanta = SW_DEFAULT_SCHED_QUANTA; + int credit_quanta = SW_DEFAULT_CREDIT_QUANTA; + + name = rte_vdev_device_name(vdev); + params = rte_vdev_device_args(vdev); + if (params != NULL && params[0] != '\0') { + struct rte_kvargs *kvlist = rte_kvargs_parse(params, args); + + if (!kvlist) { + SW_LOG_INFO( + "Ignoring unsupported parameters when creating device '%s'\n", + name); + } else { + int ret = rte_kvargs_process(kvlist, NUMA_NODE_ARG, + assign_numa_node, &socket_id); + if (ret != 0) { + SW_LOG_ERR( + "%s: Error parsing numa node parameter", + name); + rte_kvargs_free(kvlist); + return ret; + } + + ret = rte_kvargs_process(kvlist, SCHED_QUANTA_ARG, + set_sched_quanta, &sched_quanta); + if (ret != 0) { + SW_LOG_ERR( + "%s: Error parsing sched quanta parameter", + name); + rte_kvargs_free(kvlist); + return ret; + } + + ret = rte_kvargs_process(kvlist, CREDIT_QUANTA_ARG, + set_credit_quanta, &credit_quanta); + if (ret != 0) { + SW_LOG_ERR( + "%s: Error parsing credit quanta parameter", + name); + rte_kvargs_free(kvlist); + return ret; + } + + rte_kvargs_free(kvlist); + } + } + + SW_LOG_INFO( + "Creating eventdev sw device %s, numa_node=%d, sched_quanta=%d, credit_quanta=%d\n", + name, socket_id, sched_quanta, credit_quanta); + + dev = rte_event_pmd_vdev_init(name, + sizeof(struct sw_evdev), socket_id); + if (dev == NULL) { + SW_LOG_ERR("eventdev vdev init() failed"); + return -EFAULT; + } + dev->dev_ops = &evdev_sw_ops; + dev->enqueue = sw_event_enqueue; + dev->enqueue_burst = sw_event_enqueue_burst; + dev->enqueue_new_burst = sw_event_enqueue_burst; + dev->enqueue_forward_burst = sw_event_enqueue_burst; + dev->dequeue = sw_event_dequeue; + dev->dequeue_burst = sw_event_dequeue_burst; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + sw = dev->data->dev_private; + sw->data = dev->data; + + /* copy values passed from vdev command line to instance */ + sw->credit_update_quanta = credit_quanta; + sw->sched_quanta = sched_quanta; + + /* register service with EAL */ + struct rte_service_spec service; + memset(&service, 0, sizeof(struct rte_service_spec)); + snprintf(service.name, sizeof(service.name), "%s_service", name); + snprintf(sw->service_name, sizeof(sw->service_name), "%s_service", + name); + service.socket_id = socket_id; + service.callback = sw_sched_service_func; + service.callback_userdata = (void *)dev; + + int32_t ret = rte_service_component_register(&service, &sw->service_id); + if (ret) { + SW_LOG_ERR("service register() failed"); + return -ENOEXEC; + } + + dev->data->service_inited = 1; + dev->data->service_id = sw->service_id; + + return 0; +} + +static int +sw_remove(struct rte_vdev_device *vdev) +{ + const char *name; + + name = rte_vdev_device_name(vdev); + if (name == NULL) + return -EINVAL; + + SW_LOG_INFO("Closing eventdev sw device %s\n", name); + + return rte_event_pmd_vdev_uninit(name); +} + +static struct rte_vdev_driver evdev_sw_pmd_drv = { + .probe = sw_probe, + .remove = sw_remove +}; + +RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_SW_PMD, evdev_sw_pmd_drv); +RTE_PMD_REGISTER_PARAM_STRING(event_sw, NUMA_NODE_ARG "=<int> " + SCHED_QUANTA_ARG "=<int>" CREDIT_QUANTA_ARG "=<int>"); + +/* declared extern in header, for access from other .c files */ +int eventdev_sw_log_level; + +RTE_INIT(evdev_sw_init_log) +{ + eventdev_sw_log_level = rte_log_register("pmd.event.sw"); + if (eventdev_sw_log_level >= 0) + rte_log_set_level(eventdev_sw_log_level, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/event/sw/sw_evdev.h b/src/spdk/dpdk/drivers/event/sw/sw_evdev.h new file mode 100644 index 000000000..7c77b2495 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/sw_evdev.h @@ -0,0 +1,300 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2017 Intel Corporation + */ + +#ifndef _SW_EVDEV_H_ +#define _SW_EVDEV_H_ + +#include "sw_evdev_log.h" +#include <rte_eventdev.h> +#include <rte_eventdev_pmd_vdev.h> +#include <rte_atomic.h> + +#define SW_DEFAULT_CREDIT_QUANTA 32 +#define SW_DEFAULT_SCHED_QUANTA 128 +#define SW_QID_NUM_FIDS 16384 +#define SW_IQS_MAX 4 +#define SW_Q_PRIORITY_MAX 255 +#define SW_PORTS_MAX 64 +#define MAX_SW_CONS_Q_DEPTH 128 +#define SW_INFLIGHT_EVENTS_TOTAL 4096 +/* allow for lots of over-provisioning */ +#define MAX_SW_PROD_Q_DEPTH 4096 +#define SW_FRAGMENTS_MAX 16 + +/* Should be power-of-two minus one, to leave room for the next pointer */ +#define SW_EVS_PER_Q_CHUNK 255 +#define SW_Q_CHUNK_SIZE ((SW_EVS_PER_Q_CHUNK + 1) * sizeof(struct rte_event)) + +/* report dequeue burst sizes in buckets */ +#define SW_DEQ_STAT_BUCKET_SHIFT 2 +/* how many packets pulled from port by sched */ +#define SCHED_DEQUEUE_BURST_SIZE 32 + +#define SW_PORT_HIST_LIST (MAX_SW_PROD_Q_DEPTH) /* size of our history list */ +#define NUM_SAMPLES 64 /* how many data points use for average stats */ + +#define EVENTDEV_NAME_SW_PMD event_sw +#define SW_PMD_NAME RTE_STR(event_sw) +#define SW_PMD_NAME_MAX 64 + +#define SW_SCHED_TYPE_DIRECT (RTE_SCHED_TYPE_PARALLEL + 1) + +#define SW_NUM_POLL_BUCKETS (MAX_SW_CONS_Q_DEPTH >> SW_DEQ_STAT_BUCKET_SHIFT) + +enum { + QE_FLAG_VALID_SHIFT = 0, + QE_FLAG_COMPLETE_SHIFT, + QE_FLAG_NOT_EOP_SHIFT, + _QE_FLAG_COUNT +}; + +#define QE_FLAG_VALID (1 << QE_FLAG_VALID_SHIFT) /* for NEW FWD, FRAG */ +#define QE_FLAG_COMPLETE (1 << QE_FLAG_COMPLETE_SHIFT) /* set for FWD, DROP */ +#define QE_FLAG_NOT_EOP (1 << QE_FLAG_NOT_EOP_SHIFT) /* set for FRAG only */ + +static const uint8_t sw_qe_flag_map[] = { + QE_FLAG_VALID /* NEW Event */, + QE_FLAG_VALID | QE_FLAG_COMPLETE /* FWD Event */, + QE_FLAG_COMPLETE /* RELEASE Event */, + + /* Values which can be used for future support for partial + * events, i.e. where one event comes back to the scheduler + * as multiple which need to be tracked together + */ + QE_FLAG_VALID | QE_FLAG_COMPLETE | QE_FLAG_NOT_EOP, +}; + +/* Records basic event stats at a given point. Used in port and qid structs */ +struct sw_point_stats { + uint64_t rx_pkts; + uint64_t rx_dropped; + uint64_t tx_pkts; +}; + +/* structure used to track what port a flow (FID) is pinned to */ +struct sw_fid_t { + /* which CQ this FID is currently pinned to */ + int32_t cq; + /* number of packets gone to the CQ with this FID */ + uint32_t pcount; +}; + +struct reorder_buffer_entry { + uint16_t num_fragments; /**< Number of packet fragments */ + uint16_t fragment_index; /**< Points to the oldest valid frag */ + uint8_t ready; /**< Entry is ready to be reordered */ + struct rte_event fragments[SW_FRAGMENTS_MAX]; +}; + +struct sw_iq { + struct sw_queue_chunk *head; + struct sw_queue_chunk *tail; + uint16_t head_idx; + uint16_t tail_idx; + uint16_t count; +}; + +struct sw_qid { + /* set when the QID has been initialized */ + uint8_t initialized; + /* The type of this QID */ + int8_t type; + /* Integer ID representing the queue. This is used in history lists, + * to identify the stage of processing. + */ + uint32_t id; + struct sw_point_stats stats; + + /* Internal priority rings for packets */ + struct sw_iq iq[SW_IQS_MAX]; + uint32_t iq_pkt_mask; /* A mask to indicate packets in an IQ */ + uint64_t iq_pkt_count[SW_IQS_MAX]; + + /* Information on what CQs are polling this IQ */ + uint32_t cq_num_mapped_cqs; + uint32_t cq_next_tx; /* cq to write next (non-atomic) packet */ + uint32_t cq_map[SW_PORTS_MAX]; + uint64_t to_port[SW_PORTS_MAX]; + + /* Track flow ids for atomic load balancing */ + struct sw_fid_t fids[SW_QID_NUM_FIDS]; + + /* Track packet order for reordering when needed */ + struct reorder_buffer_entry *reorder_buffer; /*< pkts await reorder */ + struct rte_ring *reorder_buffer_freelist; /* available reorder slots */ + uint32_t reorder_buffer_index; /* oldest valid reorder buffer entry */ + uint32_t window_size; /* Used to wrap reorder_buffer_index */ + + uint8_t priority; +}; + +struct sw_hist_list_entry { + int32_t qid; + int32_t fid; + struct reorder_buffer_entry *rob_entry; +}; + +struct sw_evdev; + +struct sw_port { + /* new enqueue / dequeue API doesn't have an instance pointer, only the + * pointer to the port being enqueue/dequeued from + */ + struct sw_evdev *sw; + + /* set when the port is initialized */ + uint8_t initialized; + /* A numeric ID for the port */ + uint8_t id; + + /* An atomic counter for when the port has been unlinked, and the + * scheduler has not yet acked this unlink - hence there may still be + * events in the buffers going to the port. When the unlinks in + * progress is read by the scheduler, no more events will be pushed to + * the port - hence the scheduler core can just assign zero. + */ + uint8_t unlinks_in_progress; + + int16_t is_directed; /** Takes from a single directed QID */ + /** + * For loadbalanced we can optimise pulling packets from + * producers if there is no reordering involved + */ + int16_t num_ordered_qids; + + /** Ring and buffer for pulling events from workers for scheduling */ + struct rte_event_ring *rx_worker_ring __rte_cache_aligned; + /** Ring and buffer for pushing packets to workers after scheduling */ + struct rte_event_ring *cq_worker_ring; + + /* hole */ + + /* num releases yet to be completed on this port */ + uint16_t outstanding_releases __rte_cache_aligned; + uint16_t inflight_max; /* app requested max inflights for this port */ + uint16_t inflight_credits; /* num credits this port has right now */ + uint8_t implicit_release; /* release events before dequeueing */ + + uint16_t last_dequeue_burst_sz; /* how big the burst was */ + uint64_t last_dequeue_ticks; /* used to track burst processing time */ + uint64_t avg_pkt_ticks; /* tracks average over NUM_SAMPLES burst */ + uint64_t total_polls; /* how many polls were counted in stats */ + uint64_t zero_polls; /* tracks polls returning nothing */ + uint32_t poll_buckets[SW_NUM_POLL_BUCKETS]; + /* bucket values in 4s for shorter reporting */ + + /* History list structs, containing info on pkts egressed to worker */ + uint16_t hist_head __rte_cache_aligned; + uint16_t hist_tail; + uint16_t inflights; + struct sw_hist_list_entry hist_list[SW_PORT_HIST_LIST]; + + /* track packets in and out of this port */ + struct sw_point_stats stats; + + + uint32_t pp_buf_start; + uint32_t pp_buf_count; + uint16_t cq_buf_count; + struct rte_event pp_buf[SCHED_DEQUEUE_BURST_SIZE]; + struct rte_event cq_buf[MAX_SW_CONS_Q_DEPTH]; + + uint8_t num_qids_mapped; +}; + +struct sw_evdev { + struct rte_eventdev_data *data; + + uint32_t port_count; + uint32_t qid_count; + uint32_t xstats_count; + struct sw_xstats_entry *xstats; + uint32_t xstats_count_mode_dev; + uint32_t xstats_count_mode_port; + uint32_t xstats_count_mode_queue; + + /* Contains all ports - load balanced and directed */ + struct sw_port ports[SW_PORTS_MAX] __rte_cache_aligned; + + rte_atomic32_t inflights __rte_cache_aligned; + + /* + * max events in this instance. Cached here for performance. + * (also available in data->conf.nb_events_limit) + */ + uint32_t nb_events_limit; + + /* Internal queues - one per logical queue */ + struct sw_qid qids[RTE_EVENT_MAX_QUEUES_PER_DEV] __rte_cache_aligned; + struct sw_queue_chunk *chunk_list_head; + struct sw_queue_chunk *chunks; + + /* Cache how many packets are in each cq */ + uint16_t cq_ring_space[SW_PORTS_MAX] __rte_cache_aligned; + + /* Array of pointers to load-balanced QIDs sorted by priority level */ + struct sw_qid *qids_prioritized[RTE_EVENT_MAX_QUEUES_PER_DEV]; + + /* Stats */ + struct sw_point_stats stats __rte_cache_aligned; + uint64_t sched_called; + int32_t sched_quanta; + uint64_t sched_no_iq_enqueues; + uint64_t sched_no_cq_enqueues; + uint64_t sched_cq_qid_called; + + uint8_t started; + uint32_t credit_update_quanta; + + /* store num stats and offset of the stats for each port */ + uint16_t xstats_count_per_port[SW_PORTS_MAX]; + uint16_t xstats_offset_for_port[SW_PORTS_MAX]; + /* store num stats and offset of the stats for each queue */ + uint16_t xstats_count_per_qid[RTE_EVENT_MAX_QUEUES_PER_DEV]; + uint16_t xstats_offset_for_qid[RTE_EVENT_MAX_QUEUES_PER_DEV]; + + uint32_t service_id; + char service_name[SW_PMD_NAME_MAX]; +}; + +static inline struct sw_evdev * +sw_pmd_priv(const struct rte_eventdev *eventdev) +{ + return eventdev->data->dev_private; +} + +static inline const struct sw_evdev * +sw_pmd_priv_const(const struct rte_eventdev *eventdev) +{ + return eventdev->data->dev_private; +} + +uint16_t sw_event_enqueue(void *port, const struct rte_event *ev); +uint16_t sw_event_enqueue_burst(void *port, const struct rte_event ev[], + uint16_t num); + +uint16_t sw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait); +uint16_t sw_event_dequeue_burst(void *port, struct rte_event *ev, uint16_t num, + uint64_t wait); +void sw_event_schedule(struct rte_eventdev *dev); +int sw_xstats_init(struct sw_evdev *dev); +int sw_xstats_uninit(struct sw_evdev *dev); +int sw_xstats_get_names(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int size); +int sw_xstats_get(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + const unsigned int ids[], uint64_t values[], unsigned int n); +uint64_t sw_xstats_get_by_name(const struct rte_eventdev *dev, + const char *name, unsigned int *id); +int sw_xstats_reset(struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + int16_t queue_port_id, + const uint32_t ids[], + uint32_t nb_ids); + +int test_sw_eventdev(void); + +#endif /* _SW_EVDEV_H_ */ diff --git a/src/spdk/dpdk/drivers/event/sw/sw_evdev_log.h b/src/spdk/dpdk/drivers/event/sw/sw_evdev_log.h new file mode 100644 index 000000000..f76825abc --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/sw_evdev_log.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _SW_EVDEV_LOG_H_ +#define _SW_EVDEV_LOG_H_ + +extern int eventdev_sw_log_level; + +#define SW_LOG_IMPL(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, eventdev_sw_log_level, "%s" fmt "\n", \ + __func__, ##args) + +#define SW_LOG_INFO(fmt, args...) \ + SW_LOG_IMPL(INFO, fmt, ## args) + +#define SW_LOG_DBG(fmt, args...) \ + SW_LOG_IMPL(DEBUG, fmt, ## args) + +#define SW_LOG_ERR(fmt, args...) \ + SW_LOG_IMPL(ERR, fmt, ## args) + +#endif /* _SW_EVDEV_LOG_H_ */ diff --git a/src/spdk/dpdk/drivers/event/sw/sw_evdev_scheduler.c b/src/spdk/dpdk/drivers/event/sw/sw_evdev_scheduler.c new file mode 100644 index 000000000..cff747da8 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/sw_evdev_scheduler.c @@ -0,0 +1,568 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2017 Intel Corporation + */ + +#include <rte_ring.h> +#include <rte_hash_crc.h> +#include <rte_event_ring.h> +#include "sw_evdev.h" +#include "iq_chunk.h" + +#define SW_IQS_MASK (SW_IQS_MAX-1) + +/* Retrieve the highest priority IQ or -1 if no pkts available. Doing the + * CLZ twice is faster than caching the value due to data dependencies + */ +#define PKT_MASK_TO_IQ(pkts) \ + (__builtin_ctz(pkts | (1 << SW_IQS_MAX))) + +#if SW_IQS_MAX != 4 +#error Misconfigured PRIO_TO_IQ caused by SW_IQS_MAX value change +#endif +#define PRIO_TO_IQ(prio) (prio >> 6) + +#define MAX_PER_IQ_DEQUEUE 48 +#define FLOWID_MASK (SW_QID_NUM_FIDS-1) +/* use cheap bit mixing, we only need to lose a few bits */ +#define SW_HASH_FLOWID(f) (((f) ^ (f >> 10)) & FLOWID_MASK) + +static inline uint32_t +sw_schedule_atomic_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, + uint32_t iq_num, unsigned int count) +{ + struct rte_event qes[MAX_PER_IQ_DEQUEUE]; /* count <= MAX */ + struct rte_event blocked_qes[MAX_PER_IQ_DEQUEUE]; + uint32_t nb_blocked = 0; + uint32_t i; + + if (count > MAX_PER_IQ_DEQUEUE) + count = MAX_PER_IQ_DEQUEUE; + + /* This is the QID ID. The QID ID is static, hence it can be + * used to identify the stage of processing in history lists etc + */ + uint32_t qid_id = qid->id; + + iq_dequeue_burst(sw, &qid->iq[iq_num], qes, count); + for (i = 0; i < count; i++) { + const struct rte_event *qe = &qes[i]; + const uint16_t flow_id = SW_HASH_FLOWID(qes[i].flow_id); + struct sw_fid_t *fid = &qid->fids[flow_id]; + int cq = fid->cq; + + if (cq < 0) { + uint32_t cq_idx; + if (qid->cq_next_tx >= qid->cq_num_mapped_cqs) + qid->cq_next_tx = 0; + cq_idx = qid->cq_next_tx++; + + cq = qid->cq_map[cq_idx]; + + /* find least used */ + int cq_free_cnt = sw->cq_ring_space[cq]; + for (cq_idx = 0; cq_idx < qid->cq_num_mapped_cqs; + cq_idx++) { + int test_cq = qid->cq_map[cq_idx]; + int test_cq_free = sw->cq_ring_space[test_cq]; + if (test_cq_free > cq_free_cnt) { + cq = test_cq; + cq_free_cnt = test_cq_free; + } + } + + fid->cq = cq; /* this pins early */ + } + + if (sw->cq_ring_space[cq] == 0 || + sw->ports[cq].inflights == SW_PORT_HIST_LIST) { + blocked_qes[nb_blocked++] = *qe; + continue; + } + + struct sw_port *p = &sw->ports[cq]; + + /* at this point we can queue up the packet on the cq_buf */ + fid->pcount++; + p->cq_buf[p->cq_buf_count++] = *qe; + p->inflights++; + sw->cq_ring_space[cq]--; + + int head = (p->hist_head++ & (SW_PORT_HIST_LIST-1)); + p->hist_list[head].fid = flow_id; + p->hist_list[head].qid = qid_id; + + p->stats.tx_pkts++; + qid->stats.tx_pkts++; + qid->to_port[cq]++; + + /* if we just filled in the last slot, flush the buffer */ + if (sw->cq_ring_space[cq] == 0) { + struct rte_event_ring *worker = p->cq_worker_ring; + rte_event_ring_enqueue_burst(worker, p->cq_buf, + p->cq_buf_count, + &sw->cq_ring_space[cq]); + p->cq_buf_count = 0; + } + } + iq_put_back(sw, &qid->iq[iq_num], blocked_qes, nb_blocked); + + return count - nb_blocked; +} + +static inline uint32_t +sw_schedule_parallel_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, + uint32_t iq_num, unsigned int count, int keep_order) +{ + uint32_t i; + uint32_t cq_idx = qid->cq_next_tx; + + /* This is the QID ID. The QID ID is static, hence it can be + * used to identify the stage of processing in history lists etc + */ + uint32_t qid_id = qid->id; + + if (count > MAX_PER_IQ_DEQUEUE) + count = MAX_PER_IQ_DEQUEUE; + + if (keep_order) + /* only schedule as many as we have reorder buffer entries */ + count = RTE_MIN(count, + rte_ring_count(qid->reorder_buffer_freelist)); + + for (i = 0; i < count; i++) { + const struct rte_event *qe = iq_peek(&qid->iq[iq_num]); + uint32_t cq_check_count = 0; + uint32_t cq; + + /* + * for parallel, just send to next available CQ in round-robin + * fashion. So scan for an available CQ. If all CQs are full + * just return and move on to next QID + */ + do { + if (++cq_check_count > qid->cq_num_mapped_cqs) + goto exit; + if (cq_idx >= qid->cq_num_mapped_cqs) + cq_idx = 0; + cq = qid->cq_map[cq_idx++]; + + } while (rte_event_ring_free_count( + sw->ports[cq].cq_worker_ring) == 0 || + sw->ports[cq].inflights == SW_PORT_HIST_LIST); + + struct sw_port *p = &sw->ports[cq]; + if (sw->cq_ring_space[cq] == 0 || + p->inflights == SW_PORT_HIST_LIST) + break; + + sw->cq_ring_space[cq]--; + + qid->stats.tx_pkts++; + + const int head = (p->hist_head & (SW_PORT_HIST_LIST-1)); + p->hist_list[head].fid = SW_HASH_FLOWID(qe->flow_id); + p->hist_list[head].qid = qid_id; + + if (keep_order) + rte_ring_sc_dequeue(qid->reorder_buffer_freelist, + (void *)&p->hist_list[head].rob_entry); + + sw->ports[cq].cq_buf[sw->ports[cq].cq_buf_count++] = *qe; + iq_pop(sw, &qid->iq[iq_num]); + + rte_compiler_barrier(); + p->inflights++; + p->stats.tx_pkts++; + p->hist_head++; + } +exit: + qid->cq_next_tx = cq_idx; + return i; +} + +static uint32_t +sw_schedule_dir_to_cq(struct sw_evdev *sw, struct sw_qid * const qid, + uint32_t iq_num, unsigned int count __rte_unused) +{ + uint32_t cq_id = qid->cq_map[0]; + struct sw_port *port = &sw->ports[cq_id]; + + /* get max burst enq size for cq_ring */ + uint32_t count_free = sw->cq_ring_space[cq_id]; + if (count_free == 0) + return 0; + + /* burst dequeue from the QID IQ ring */ + struct sw_iq *iq = &qid->iq[iq_num]; + uint32_t ret = iq_dequeue_burst(sw, iq, + &port->cq_buf[port->cq_buf_count], count_free); + port->cq_buf_count += ret; + + /* Update QID, Port and Total TX stats */ + qid->stats.tx_pkts += ret; + port->stats.tx_pkts += ret; + + /* Subtract credits from cached value */ + sw->cq_ring_space[cq_id] -= ret; + + return ret; +} + +static uint32_t +sw_schedule_qid_to_cq(struct sw_evdev *sw) +{ + uint32_t pkts = 0; + uint32_t qid_idx; + + sw->sched_cq_qid_called++; + + for (qid_idx = 0; qid_idx < sw->qid_count; qid_idx++) { + struct sw_qid *qid = sw->qids_prioritized[qid_idx]; + + int type = qid->type; + int iq_num = PKT_MASK_TO_IQ(qid->iq_pkt_mask); + + /* zero mapped CQs indicates directed */ + if (iq_num >= SW_IQS_MAX || qid->cq_num_mapped_cqs == 0) + continue; + + uint32_t pkts_done = 0; + uint32_t count = iq_count(&qid->iq[iq_num]); + + if (count > 0) { + if (type == SW_SCHED_TYPE_DIRECT) + pkts_done += sw_schedule_dir_to_cq(sw, qid, + iq_num, count); + else if (type == RTE_SCHED_TYPE_ATOMIC) + pkts_done += sw_schedule_atomic_to_cq(sw, qid, + iq_num, count); + else + pkts_done += sw_schedule_parallel_to_cq(sw, qid, + iq_num, count, + type == RTE_SCHED_TYPE_ORDERED); + } + + /* Check if the IQ that was polled is now empty, and unset it + * in the IQ mask if its empty. + */ + int all_done = (pkts_done == count); + + qid->iq_pkt_mask &= ~(all_done << (iq_num)); + pkts += pkts_done; + } + + return pkts; +} + +/* This function will perform re-ordering of packets, and injecting into + * the appropriate QID IQ. As LB and DIR QIDs are in the same array, but *NOT* + * contiguous in that array, this function accepts a "range" of QIDs to scan. + */ +static uint16_t +sw_schedule_reorder(struct sw_evdev *sw, int qid_start, int qid_end) +{ + /* Perform egress reordering */ + struct rte_event *qe; + uint32_t pkts_iter = 0; + + for (; qid_start < qid_end; qid_start++) { + struct sw_qid *qid = &sw->qids[qid_start]; + int i, num_entries_in_use; + + if (qid->type != RTE_SCHED_TYPE_ORDERED) + continue; + + num_entries_in_use = rte_ring_free_count( + qid->reorder_buffer_freelist); + + for (i = 0; i < num_entries_in_use; i++) { + struct reorder_buffer_entry *entry; + int j; + + entry = &qid->reorder_buffer[qid->reorder_buffer_index]; + + if (!entry->ready) + break; + + for (j = 0; j < entry->num_fragments; j++) { + uint16_t dest_qid; + uint16_t dest_iq; + + int idx = entry->fragment_index + j; + qe = &entry->fragments[idx]; + + dest_qid = qe->queue_id; + dest_iq = PRIO_TO_IQ(qe->priority); + + if (dest_qid >= sw->qid_count) { + sw->stats.rx_dropped++; + continue; + } + + pkts_iter++; + + struct sw_qid *q = &sw->qids[dest_qid]; + struct sw_iq *iq = &q->iq[dest_iq]; + + /* we checked for space above, so enqueue must + * succeed + */ + iq_enqueue(sw, iq, qe); + q->iq_pkt_mask |= (1 << (dest_iq)); + q->iq_pkt_count[dest_iq]++; + q->stats.rx_pkts++; + } + + entry->ready = (j != entry->num_fragments); + entry->num_fragments -= j; + entry->fragment_index += j; + + if (!entry->ready) { + entry->fragment_index = 0; + + rte_ring_sp_enqueue( + qid->reorder_buffer_freelist, + entry); + + qid->reorder_buffer_index++; + qid->reorder_buffer_index %= qid->window_size; + } + } + } + return pkts_iter; +} + +static __rte_always_inline void +sw_refill_pp_buf(struct sw_evdev *sw, struct sw_port *port) +{ + RTE_SET_USED(sw); + struct rte_event_ring *worker = port->rx_worker_ring; + port->pp_buf_start = 0; + port->pp_buf_count = rte_event_ring_dequeue_burst(worker, port->pp_buf, + RTE_DIM(port->pp_buf), NULL); +} + +static __rte_always_inline uint32_t +__pull_port_lb(struct sw_evdev *sw, uint32_t port_id, int allow_reorder) +{ + static struct reorder_buffer_entry dummy_rob; + uint32_t pkts_iter = 0; + struct sw_port *port = &sw->ports[port_id]; + + /* If shadow ring has 0 pkts, pull from worker ring */ + if (port->pp_buf_count == 0) + sw_refill_pp_buf(sw, port); + + while (port->pp_buf_count) { + const struct rte_event *qe = &port->pp_buf[port->pp_buf_start]; + struct sw_hist_list_entry *hist_entry = NULL; + uint8_t flags = qe->op; + const uint16_t eop = !(flags & QE_FLAG_NOT_EOP); + int needs_reorder = 0; + /* if no-reordering, having PARTIAL == NEW */ + if (!allow_reorder && !eop) + flags = QE_FLAG_VALID; + + /* + * if we don't have space for this packet in an IQ, + * then move on to next queue. Technically, for a + * packet that needs reordering, we don't need to check + * here, but it simplifies things not to special-case + */ + uint32_t iq_num = PRIO_TO_IQ(qe->priority); + struct sw_qid *qid = &sw->qids[qe->queue_id]; + + /* now process based on flags. Note that for directed + * queues, the enqueue_flush masks off all but the + * valid flag. This makes FWD and PARTIAL enqueues just + * NEW type, and makes DROPS no-op calls. + */ + if ((flags & QE_FLAG_COMPLETE) && port->inflights > 0) { + const uint32_t hist_tail = port->hist_tail & + (SW_PORT_HIST_LIST - 1); + + hist_entry = &port->hist_list[hist_tail]; + const uint32_t hist_qid = hist_entry->qid; + const uint32_t hist_fid = hist_entry->fid; + + struct sw_fid_t *fid = + &sw->qids[hist_qid].fids[hist_fid]; + fid->pcount -= eop; + if (fid->pcount == 0) + fid->cq = -1; + + if (allow_reorder) { + /* set reorder ready if an ordered QID */ + uintptr_t rob_ptr = + (uintptr_t)hist_entry->rob_entry; + const uintptr_t valid = (rob_ptr != 0); + needs_reorder = valid; + rob_ptr |= + ((valid - 1) & (uintptr_t)&dummy_rob); + struct reorder_buffer_entry *tmp_rob_ptr = + (struct reorder_buffer_entry *)rob_ptr; + tmp_rob_ptr->ready = eop * needs_reorder; + } + + port->inflights -= eop; + port->hist_tail += eop; + } + if (flags & QE_FLAG_VALID) { + port->stats.rx_pkts++; + + if (allow_reorder && needs_reorder) { + struct reorder_buffer_entry *rob_entry = + hist_entry->rob_entry; + + hist_entry->rob_entry = NULL; + /* Although fragmentation not currently + * supported by eventdev API, we support it + * here. Open: How do we alert the user that + * they've exceeded max frags? + */ + int num_frag = rob_entry->num_fragments; + if (num_frag == SW_FRAGMENTS_MAX) + sw->stats.rx_dropped++; + else { + int idx = rob_entry->num_fragments++; + rob_entry->fragments[idx] = *qe; + } + goto end_qe; + } + + /* Use the iq_num from above to push the QE + * into the qid at the right priority + */ + + qid->iq_pkt_mask |= (1 << (iq_num)); + iq_enqueue(sw, &qid->iq[iq_num], qe); + qid->iq_pkt_count[iq_num]++; + qid->stats.rx_pkts++; + pkts_iter++; + } + +end_qe: + port->pp_buf_start++; + port->pp_buf_count--; + } /* while (avail_qes) */ + + return pkts_iter; +} + +static uint32_t +sw_schedule_pull_port_lb(struct sw_evdev *sw, uint32_t port_id) +{ + return __pull_port_lb(sw, port_id, 1); +} + +static uint32_t +sw_schedule_pull_port_no_reorder(struct sw_evdev *sw, uint32_t port_id) +{ + return __pull_port_lb(sw, port_id, 0); +} + +static uint32_t +sw_schedule_pull_port_dir(struct sw_evdev *sw, uint32_t port_id) +{ + uint32_t pkts_iter = 0; + struct sw_port *port = &sw->ports[port_id]; + + /* If shadow ring has 0 pkts, pull from worker ring */ + if (port->pp_buf_count == 0) + sw_refill_pp_buf(sw, port); + + while (port->pp_buf_count) { + const struct rte_event *qe = &port->pp_buf[port->pp_buf_start]; + uint8_t flags = qe->op; + + if ((flags & QE_FLAG_VALID) == 0) + goto end_qe; + + uint32_t iq_num = PRIO_TO_IQ(qe->priority); + struct sw_qid *qid = &sw->qids[qe->queue_id]; + struct sw_iq *iq = &qid->iq[iq_num]; + + port->stats.rx_pkts++; + + /* Use the iq_num from above to push the QE + * into the qid at the right priority + */ + qid->iq_pkt_mask |= (1 << (iq_num)); + iq_enqueue(sw, iq, qe); + qid->iq_pkt_count[iq_num]++; + qid->stats.rx_pkts++; + pkts_iter++; + +end_qe: + port->pp_buf_start++; + port->pp_buf_count--; + } /* while port->pp_buf_count */ + + return pkts_iter; +} + +void +sw_event_schedule(struct rte_eventdev *dev) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + uint32_t in_pkts, out_pkts; + uint32_t out_pkts_total = 0, in_pkts_total = 0; + int32_t sched_quanta = sw->sched_quanta; + uint32_t i; + + sw->sched_called++; + if (unlikely(!sw->started)) + return; + + do { + uint32_t in_pkts_this_iteration = 0; + + /* Pull from rx_ring for ports */ + do { + in_pkts = 0; + for (i = 0; i < sw->port_count; i++) { + /* ack the unlinks in progress as done */ + if (sw->ports[i].unlinks_in_progress) + sw->ports[i].unlinks_in_progress = 0; + + if (sw->ports[i].is_directed) + in_pkts += sw_schedule_pull_port_dir(sw, i); + else if (sw->ports[i].num_ordered_qids > 0) + in_pkts += sw_schedule_pull_port_lb(sw, i); + else + in_pkts += sw_schedule_pull_port_no_reorder(sw, i); + } + + /* QID scan for re-ordered */ + in_pkts += sw_schedule_reorder(sw, 0, + sw->qid_count); + in_pkts_this_iteration += in_pkts; + } while (in_pkts > 4 && + (int)in_pkts_this_iteration < sched_quanta); + + out_pkts = sw_schedule_qid_to_cq(sw); + out_pkts_total += out_pkts; + in_pkts_total += in_pkts_this_iteration; + + if (in_pkts == 0 && out_pkts == 0) + break; + } while ((int)out_pkts_total < sched_quanta); + + sw->stats.tx_pkts += out_pkts_total; + sw->stats.rx_pkts += in_pkts_total; + + sw->sched_no_iq_enqueues += (in_pkts_total == 0); + sw->sched_no_cq_enqueues += (out_pkts_total == 0); + + /* push all the internal buffered QEs in port->cq_ring to the + * worker cores: aka, do the ring transfers batched. + */ + for (i = 0; i < sw->port_count; i++) { + struct rte_event_ring *worker = sw->ports[i].cq_worker_ring; + rte_event_ring_enqueue_burst(worker, sw->ports[i].cq_buf, + sw->ports[i].cq_buf_count, + &sw->cq_ring_space[i]); + sw->ports[i].cq_buf_count = 0; + } + +} diff --git a/src/spdk/dpdk/drivers/event/sw/sw_evdev_selftest.c b/src/spdk/dpdk/drivers/event/sw/sw_evdev_selftest.c new file mode 100644 index 000000000..38c21fa0f --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/sw_evdev_selftest.c @@ -0,0 +1,3401 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2017 Intel Corporation + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <unistd.h> +#include <sys/queue.h> + +#include <rte_memory.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_debug.h> +#include <rte_ethdev.h> +#include <rte_cycles.h> +#include <rte_eventdev.h> +#include <rte_pause.h> +#include <rte_service.h> +#include <rte_service_component.h> +#include <rte_bus_vdev.h> + +#include "sw_evdev.h" + +#define MAX_PORTS 16 +#define MAX_QIDS 16 +#define NUM_PACKETS (1<<18) +#define DEQUEUE_DEPTH 128 + +static int evdev; + +struct test { + struct rte_mempool *mbuf_pool; + uint8_t port[MAX_PORTS]; + uint8_t qid[MAX_QIDS]; + int nb_qids; + uint32_t service_id; +}; + +static struct rte_event release_ev; + +static inline struct rte_mbuf * +rte_gen_arp(int portid, struct rte_mempool *mp) +{ + /* + * len = 14 + 46 + * ARP, Request who-has 10.0.0.1 tell 10.0.0.2, length 46 + */ + static const uint8_t arp_request[] = { + /*0x0000:*/ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xec, 0xa8, + 0x6b, 0xfd, 0x02, 0x29, 0x08, 0x06, 0x00, 0x01, + /*0x0010:*/ 0x08, 0x00, 0x06, 0x04, 0x00, 0x01, 0xec, 0xa8, + 0x6b, 0xfd, 0x02, 0x29, 0x0a, 0x00, 0x00, 0x01, + /*0x0020:*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, + 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /*0x0030:*/ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + }; + struct rte_mbuf *m; + int pkt_len = sizeof(arp_request) - 1; + + m = rte_pktmbuf_alloc(mp); + if (!m) + return 0; + + memcpy((void *)((uintptr_t)m->buf_addr + m->data_off), + arp_request, pkt_len); + rte_pktmbuf_pkt_len(m) = pkt_len; + rte_pktmbuf_data_len(m) = pkt_len; + + RTE_SET_USED(portid); + + return m; +} + +static void +xstats_print(void) +{ + const uint32_t XSTATS_MAX = 1024; + uint32_t i; + uint32_t ids[XSTATS_MAX]; + uint64_t values[XSTATS_MAX]; + struct rte_event_dev_xstats_name xstats_names[XSTATS_MAX]; + + for (i = 0; i < XSTATS_MAX; i++) + ids[i] = i; + + /* Device names / values */ + int ret = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, 0, + xstats_names, ids, XSTATS_MAX); + if (ret < 0) { + printf("%d: xstats names get() returned error\n", + __LINE__); + return; + } + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, + 0, ids, values, ret); + if (ret > (signed int)XSTATS_MAX) + printf("%s %d: more xstats available than space\n", + __func__, __LINE__); + for (i = 0; (signed int)i < ret; i++) { + printf("%d : %s : %"PRIu64"\n", + i, xstats_names[i].name, values[i]); + } + + /* Port names / values */ + ret = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_PORT, 0, + xstats_names, ids, XSTATS_MAX); + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_PORT, 1, + ids, values, ret); + if (ret > (signed int)XSTATS_MAX) + printf("%s %d: more xstats available than space\n", + __func__, __LINE__); + for (i = 0; (signed int)i < ret; i++) { + printf("%d : %s : %"PRIu64"\n", + i, xstats_names[i].name, values[i]); + } + + /* Queue names / values */ + ret = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, 0, + xstats_names, ids, XSTATS_MAX); + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, + 1, ids, values, ret); + if (ret > (signed int)XSTATS_MAX) + printf("%s %d: more xstats available than space\n", + __func__, __LINE__); + for (i = 0; (signed int)i < ret; i++) { + printf("%d : %s : %"PRIu64"\n", + i, xstats_names[i].name, values[i]); + } +} + +/* initialization and config */ +static inline int +init(struct test *t, int nb_queues, int nb_ports) +{ + struct rte_event_dev_config config = { + .nb_event_queues = nb_queues, + .nb_event_ports = nb_ports, + .nb_event_queue_flows = 1024, + .nb_events_limit = 4096, + .nb_event_port_dequeue_depth = DEQUEUE_DEPTH, + .nb_event_port_enqueue_depth = 128, + }; + int ret; + + void *temp = t->mbuf_pool; /* save and restore mbuf pool */ + + memset(t, 0, sizeof(*t)); + t->mbuf_pool = temp; + + ret = rte_event_dev_configure(evdev, &config); + if (ret < 0) + printf("%d: Error configuring device\n", __LINE__); + return ret; +}; + +static inline int +create_ports(struct test *t, int num_ports) +{ + int i; + static const struct rte_event_port_conf conf = { + .new_event_threshold = 1024, + .dequeue_depth = 32, + .enqueue_depth = 64, + .disable_implicit_release = 0, + }; + if (num_ports > MAX_PORTS) + return -1; + + for (i = 0; i < num_ports; i++) { + if (rte_event_port_setup(evdev, i, &conf) < 0) { + printf("Error setting up port %d\n", i); + return -1; + } + t->port[i] = i; + } + + return 0; +} + +static inline int +create_lb_qids(struct test *t, int num_qids, uint32_t flags) +{ + int i; + + /* Q creation */ + const struct rte_event_queue_conf conf = { + .schedule_type = flags, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + + for (i = t->nb_qids; i < t->nb_qids + num_qids; i++) { + if (rte_event_queue_setup(evdev, i, &conf) < 0) { + printf("%d: error creating qid %d\n", __LINE__, i); + return -1; + } + t->qid[i] = i; + } + t->nb_qids += num_qids; + if (t->nb_qids > MAX_QIDS) + return -1; + + return 0; +} + +static inline int +create_atomic_qids(struct test *t, int num_qids) +{ + return create_lb_qids(t, num_qids, RTE_SCHED_TYPE_ATOMIC); +} + +static inline int +create_ordered_qids(struct test *t, int num_qids) +{ + return create_lb_qids(t, num_qids, RTE_SCHED_TYPE_ORDERED); +} + + +static inline int +create_unordered_qids(struct test *t, int num_qids) +{ + return create_lb_qids(t, num_qids, RTE_SCHED_TYPE_PARALLEL); +} + +static inline int +create_directed_qids(struct test *t, int num_qids, const uint8_t ports[]) +{ + int i; + + /* Q creation */ + static const struct rte_event_queue_conf conf = { + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .event_queue_cfg = RTE_EVENT_QUEUE_CFG_SINGLE_LINK, + }; + + for (i = t->nb_qids; i < t->nb_qids + num_qids; i++) { + if (rte_event_queue_setup(evdev, i, &conf) < 0) { + printf("%d: error creating qid %d\n", __LINE__, i); + return -1; + } + t->qid[i] = i; + + if (rte_event_port_link(evdev, ports[i - t->nb_qids], + &t->qid[i], NULL, 1) != 1) { + printf("%d: error creating link for qid %d\n", + __LINE__, i); + return -1; + } + } + t->nb_qids += num_qids; + if (t->nb_qids > MAX_QIDS) + return -1; + + return 0; +} + +/* destruction */ +static inline int +cleanup(struct test *t __rte_unused) +{ + rte_event_dev_stop(evdev); + rte_event_dev_close(evdev); + return 0; +}; + +struct test_event_dev_stats { + uint64_t rx_pkts; /**< Total packets received */ + uint64_t rx_dropped; /**< Total packets dropped (Eg Invalid QID) */ + uint64_t tx_pkts; /**< Total packets transmitted */ + + /** Packets received on this port */ + uint64_t port_rx_pkts[MAX_PORTS]; + /** Packets dropped on this port */ + uint64_t port_rx_dropped[MAX_PORTS]; + /** Packets inflight on this port */ + uint64_t port_inflight[MAX_PORTS]; + /** Packets transmitted on this port */ + uint64_t port_tx_pkts[MAX_PORTS]; + /** Packets received on this qid */ + uint64_t qid_rx_pkts[MAX_QIDS]; + /** Packets dropped on this qid */ + uint64_t qid_rx_dropped[MAX_QIDS]; + /** Packets transmitted on this qid */ + uint64_t qid_tx_pkts[MAX_QIDS]; +}; + +static inline int +test_event_dev_stats_get(int dev_id, struct test_event_dev_stats *stats) +{ + static uint32_t i; + static uint32_t total_ids[3]; /* rx, tx and drop */ + static uint32_t port_rx_pkts_ids[MAX_PORTS]; + static uint32_t port_rx_dropped_ids[MAX_PORTS]; + static uint32_t port_inflight_ids[MAX_PORTS]; + static uint32_t port_tx_pkts_ids[MAX_PORTS]; + static uint32_t qid_rx_pkts_ids[MAX_QIDS]; + static uint32_t qid_rx_dropped_ids[MAX_QIDS]; + static uint32_t qid_tx_pkts_ids[MAX_QIDS]; + + + stats->rx_pkts = rte_event_dev_xstats_by_name_get(dev_id, + "dev_rx", &total_ids[0]); + stats->rx_dropped = rte_event_dev_xstats_by_name_get(dev_id, + "dev_drop", &total_ids[1]); + stats->tx_pkts = rte_event_dev_xstats_by_name_get(dev_id, + "dev_tx", &total_ids[2]); + for (i = 0; i < MAX_PORTS; i++) { + char name[32]; + snprintf(name, sizeof(name), "port_%u_rx", i); + stats->port_rx_pkts[i] = rte_event_dev_xstats_by_name_get( + dev_id, name, &port_rx_pkts_ids[i]); + snprintf(name, sizeof(name), "port_%u_drop", i); + stats->port_rx_dropped[i] = rte_event_dev_xstats_by_name_get( + dev_id, name, &port_rx_dropped_ids[i]); + snprintf(name, sizeof(name), "port_%u_inflight", i); + stats->port_inflight[i] = rte_event_dev_xstats_by_name_get( + dev_id, name, &port_inflight_ids[i]); + snprintf(name, sizeof(name), "port_%u_tx", i); + stats->port_tx_pkts[i] = rte_event_dev_xstats_by_name_get( + dev_id, name, &port_tx_pkts_ids[i]); + } + for (i = 0; i < MAX_QIDS; i++) { + char name[32]; + snprintf(name, sizeof(name), "qid_%u_rx", i); + stats->qid_rx_pkts[i] = rte_event_dev_xstats_by_name_get( + dev_id, name, &qid_rx_pkts_ids[i]); + snprintf(name, sizeof(name), "qid_%u_drop", i); + stats->qid_rx_dropped[i] = rte_event_dev_xstats_by_name_get( + dev_id, name, &qid_rx_dropped_ids[i]); + snprintf(name, sizeof(name), "qid_%u_tx", i); + stats->qid_tx_pkts[i] = rte_event_dev_xstats_by_name_get( + dev_id, name, &qid_tx_pkts_ids[i]); + } + + return 0; +} + +/* run_prio_packet_test + * This performs a basic packet priority check on the test instance passed in. + * It is factored out of the main priority tests as the same tests must be + * performed to ensure prioritization of each type of QID. + * + * Requirements: + * - An initialized test structure, including mempool + * - t->port[0] is initialized for both Enq / Deq of packets to the QID + * - t->qid[0] is the QID to be tested + * - if LB QID, the CQ must be mapped to the QID. + */ +static int +run_prio_packet_test(struct test *t) +{ + int err; + const uint32_t MAGIC_SEQN[] = {4711, 1234}; + const uint32_t PRIORITY[] = { + RTE_EVENT_DEV_PRIORITY_NORMAL, + RTE_EVENT_DEV_PRIORITY_HIGHEST + }; + unsigned int i; + for (i = 0; i < RTE_DIM(MAGIC_SEQN); i++) { + /* generate pkt and enqueue */ + struct rte_event ev; + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + arp->seqn = MAGIC_SEQN[i]; + + ev = (struct rte_event){ + .priority = PRIORITY[i], + .op = RTE_EVENT_OP_NEW, + .queue_id = t->qid[0], + .mbuf = arp + }; + err = rte_event_enqueue_burst(evdev, t->port[0], &ev, 1); + if (err != 1) { + printf("%d: error failed to enqueue\n", __LINE__); + return -1; + } + } + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + struct test_event_dev_stats stats; + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: error failed to get stats\n", __LINE__); + return -1; + } + + if (stats.port_rx_pkts[t->port[0]] != 2) { + printf("%d: error stats incorrect for directed port\n", + __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + + struct rte_event ev, ev2; + uint32_t deq_pkts; + deq_pkts = rte_event_dequeue_burst(evdev, t->port[0], &ev, 1, 0); + if (deq_pkts != 1) { + printf("%d: error failed to deq\n", __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + if (ev.mbuf->seqn != MAGIC_SEQN[1]) { + printf("%d: first packet out not highest priority\n", + __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + rte_pktmbuf_free(ev.mbuf); + + deq_pkts = rte_event_dequeue_burst(evdev, t->port[0], &ev2, 1, 0); + if (deq_pkts != 1) { + printf("%d: error failed to deq\n", __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + if (ev2.mbuf->seqn != MAGIC_SEQN[0]) { + printf("%d: second packet out not lower priority\n", + __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + rte_pktmbuf_free(ev2.mbuf); + + cleanup(t); + return 0; +} + +static int +test_single_directed_packet(struct test *t) +{ + const int rx_enq = 0; + const int wrk_enq = 2; + int err; + + /* Create instance with 3 directed QIDs going to 3 ports */ + if (init(t, 3, 3) < 0 || + create_ports(t, 3) < 0 || + create_directed_qids(t, 3, t->port) < 0) + return -1; + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /************** FORWARD ****************/ + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = wrk_enq, + .mbuf = arp, + }; + + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + + const uint32_t MAGIC_SEQN = 4711; + arp->seqn = MAGIC_SEQN; + + /* generate pkt and enqueue */ + err = rte_event_enqueue_burst(evdev, rx_enq, &ev, 1); + if (err != 1) { + printf("%d: error failed to enqueue\n", __LINE__); + return -1; + } + + /* Run schedule() as dir packets may need to be re-ordered */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + struct test_event_dev_stats stats; + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: error failed to get stats\n", __LINE__); + return -1; + } + + if (stats.port_rx_pkts[rx_enq] != 1) { + printf("%d: error stats incorrect for directed port\n", + __LINE__); + return -1; + } + + uint32_t deq_pkts; + deq_pkts = rte_event_dequeue_burst(evdev, wrk_enq, &ev, 1, 0); + if (deq_pkts != 1) { + printf("%d: error failed to deq\n", __LINE__); + return -1; + } + + err = test_event_dev_stats_get(evdev, &stats); + if (stats.port_rx_pkts[wrk_enq] != 0 && + stats.port_rx_pkts[wrk_enq] != 1) { + printf("%d: error directed stats post-dequeue\n", __LINE__); + return -1; + } + + if (ev.mbuf->seqn != MAGIC_SEQN) { + printf("%d: error magic sequence number not dequeued\n", + __LINE__); + return -1; + } + + rte_pktmbuf_free(ev.mbuf); + cleanup(t); + return 0; +} + +static int +test_directed_forward_credits(struct test *t) +{ + uint32_t i; + int32_t err; + + if (init(t, 1, 1) < 0 || + create_ports(t, 1) < 0 || + create_directed_qids(t, 1, t->port) < 0) + return -1; + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = 0, + }; + + for (i = 0; i < 1000; i++) { + err = rte_event_enqueue_burst(evdev, 0, &ev, 1); + if (err != 1) { + printf("%d: error failed to enqueue\n", __LINE__); + return -1; + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + uint32_t deq_pkts; + deq_pkts = rte_event_dequeue_burst(evdev, 0, &ev, 1, 0); + if (deq_pkts != 1) { + printf("%d: error failed to deq\n", __LINE__); + return -1; + } + + /* re-write event to be a forward, and continue looping it */ + ev.op = RTE_EVENT_OP_FORWARD; + } + + cleanup(t); + return 0; +} + + +static int +test_priority_directed(struct test *t) +{ + if (init(t, 1, 1) < 0 || + create_ports(t, 1) < 0 || + create_directed_qids(t, 1, t->port) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + return run_prio_packet_test(t); +} + +static int +test_priority_atomic(struct test *t) +{ + if (init(t, 1, 1) < 0 || + create_ports(t, 1) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* map the QID */ + if (rte_event_port_link(evdev, t->port[0], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping qid to port\n", __LINE__); + return -1; + } + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + return run_prio_packet_test(t); +} + +static int +test_priority_ordered(struct test *t) +{ + if (init(t, 1, 1) < 0 || + create_ports(t, 1) < 0 || + create_ordered_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* map the QID */ + if (rte_event_port_link(evdev, t->port[0], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping qid to port\n", __LINE__); + return -1; + } + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + return run_prio_packet_test(t); +} + +static int +test_priority_unordered(struct test *t) +{ + if (init(t, 1, 1) < 0 || + create_ports(t, 1) < 0 || + create_unordered_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* map the QID */ + if (rte_event_port_link(evdev, t->port[0], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping qid to port\n", __LINE__); + return -1; + } + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + return run_prio_packet_test(t); +} + +static int +burst_packets(struct test *t) +{ + /************** CONFIG ****************/ + uint32_t i; + int err; + int ret; + + /* Create instance with 2 ports and 2 queues */ + if (init(t, 2, 2) < 0 || + create_ports(t, 2) < 0 || + create_atomic_qids(t, 2) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* CQ mapping to QID */ + ret = rte_event_port_link(evdev, t->port[0], &t->qid[0], NULL, 1); + if (ret != 1) { + printf("%d: error mapping lb qid0\n", __LINE__); + return -1; + } + ret = rte_event_port_link(evdev, t->port[1], &t->qid[1], NULL, 1); + if (ret != 1) { + printf("%d: error mapping lb qid1\n", __LINE__); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /************** FORWARD ****************/ + const uint32_t rx_port = 0; + const uint32_t NUM_PKTS = 2; + + for (i = 0; i < NUM_PKTS; i++) { + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: error generating pkt\n", __LINE__); + return -1; + } + + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = i % 2, + .flow_id = i % 3, + .mbuf = arp, + }; + /* generate pkt and enqueue */ + err = rte_event_enqueue_burst(evdev, t->port[rx_port], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Check stats for all NUM_PKTS arrived to sched core */ + struct test_event_dev_stats stats; + + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: failed to get stats\n", __LINE__); + return -1; + } + if (stats.rx_pkts != NUM_PKTS || stats.tx_pkts != NUM_PKTS) { + printf("%d: Sched core didn't receive all %d pkts\n", + __LINE__, NUM_PKTS); + rte_event_dev_dump(evdev, stdout); + return -1; + } + + uint32_t deq_pkts; + int p; + + deq_pkts = 0; + /******** DEQ QID 1 *******/ + do { + struct rte_event ev; + p = rte_event_dequeue_burst(evdev, t->port[0], &ev, 1, 0); + deq_pkts += p; + rte_pktmbuf_free(ev.mbuf); + } while (p); + + if (deq_pkts != NUM_PKTS/2) { + printf("%d: Half of NUM_PKTS didn't arrive at port 1\n", + __LINE__); + return -1; + } + + /******** DEQ QID 2 *******/ + deq_pkts = 0; + do { + struct rte_event ev; + p = rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0); + deq_pkts += p; + rte_pktmbuf_free(ev.mbuf); + } while (p); + if (deq_pkts != NUM_PKTS/2) { + printf("%d: Half of NUM_PKTS didn't arrive at port 2\n", + __LINE__); + return -1; + } + + cleanup(t); + return 0; +} + +static int +abuse_inflights(struct test *t) +{ + const int rx_enq = 0; + const int wrk_enq = 2; + int err; + + /* Create instance with 4 ports */ + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* CQ mapping to QID */ + err = rte_event_port_link(evdev, t->port[wrk_enq], NULL, NULL, 0); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + cleanup(t); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* Enqueue op only */ + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &release_ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + + /* schedule */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + struct test_event_dev_stats stats; + + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: failed to get stats\n", __LINE__); + return -1; + } + + if (stats.rx_pkts != 0 || + stats.tx_pkts != 0 || + stats.port_inflight[wrk_enq] != 0) { + printf("%d: Sched core didn't handle pkt as expected\n", + __LINE__); + return -1; + } + + cleanup(t); + return 0; +} + +static int +xstats_tests(struct test *t) +{ + const int wrk_enq = 2; + int err; + + /* Create instance with 4 ports */ + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* CQ mapping to QID */ + err = rte_event_port_link(evdev, t->port[wrk_enq], NULL, NULL, 0); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + cleanup(t); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + const uint32_t XSTATS_MAX = 1024; + + uint32_t i; + uint32_t ids[XSTATS_MAX]; + uint64_t values[XSTATS_MAX]; + struct rte_event_dev_xstats_name xstats_names[XSTATS_MAX]; + + for (i = 0; i < XSTATS_MAX; i++) + ids[i] = i; + + /* Device names / values */ + int ret = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, + 0, xstats_names, ids, XSTATS_MAX); + if (ret != 6) { + printf("%d: expected 6 stats, got return %d\n", __LINE__, ret); + return -1; + } + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, + 0, ids, values, ret); + if (ret != 6) { + printf("%d: expected 6 stats, got return %d\n", __LINE__, ret); + return -1; + } + + /* Port names / values */ + ret = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_PORT, 0, + xstats_names, ids, XSTATS_MAX); + if (ret != 21) { + printf("%d: expected 21 stats, got return %d\n", __LINE__, ret); + return -1; + } + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_PORT, 0, + ids, values, ret); + if (ret != 21) { + printf("%d: expected 21 stats, got return %d\n", __LINE__, ret); + return -1; + } + + /* Queue names / values */ + ret = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, + 0, xstats_names, ids, XSTATS_MAX); + if (ret != 16) { + printf("%d: expected 16 stats, got return %d\n", __LINE__, ret); + return -1; + } + + /* NEGATIVE TEST: with wrong queue passed, 0 stats should be returned */ + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, + 1, ids, values, ret); + if (ret != -EINVAL) { + printf("%d: expected 0 stats, got return %d\n", __LINE__, ret); + return -1; + } + + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, + 0, ids, values, ret); + if (ret != 16) { + printf("%d: expected 16 stats, got return %d\n", __LINE__, ret); + return -1; + } + + /* enqueue packets to check values */ + for (i = 0; i < 3; i++) { + struct rte_event ev; + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + ev.queue_id = t->qid[i]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = arp; + ev.flow_id = 7; + arp->seqn = i; + + int err = rte_event_enqueue_burst(evdev, t->port[0], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Device names / values */ + int num_stats = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, 0, + xstats_names, ids, XSTATS_MAX); + if (num_stats < 0) + goto fail; + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, + 0, ids, values, num_stats); + static const uint64_t expected[] = {3, 3, 0, 1, 0, 0}; + for (i = 0; (signed int)i < ret; i++) { + if (expected[i] != values[i]) { + printf( + "%d Error xstat %d (id %d) %s : %"PRIu64 + ", expect %"PRIu64"\n", + __LINE__, i, ids[i], xstats_names[i].name, + values[i], expected[i]); + goto fail; + } + } + + ret = rte_event_dev_xstats_reset(evdev, RTE_EVENT_DEV_XSTATS_DEVICE, + 0, NULL, 0); + + /* ensure reset statistics are zero-ed */ + static const uint64_t expected_zero[] = {0, 0, 0, 0, 0, 0}; + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, + 0, ids, values, num_stats); + for (i = 0; (signed int)i < ret; i++) { + if (expected_zero[i] != values[i]) { + printf( + "%d Error, xstat %d (id %d) %s : %"PRIu64 + ", expect %"PRIu64"\n", + __LINE__, i, ids[i], xstats_names[i].name, + values[i], expected_zero[i]); + goto fail; + } + } + + /* port reset checks */ + num_stats = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_PORT, 0, + xstats_names, ids, XSTATS_MAX); + if (num_stats < 0) + goto fail; + ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_PORT, + 0, ids, values, num_stats); + + static const uint64_t port_expected[] = { + 3 /* rx */, + 0 /* tx */, + 0 /* drop */, + 0 /* inflights */, + 0 /* avg pkt cycles */, + 29 /* credits */, + 0 /* rx ring used */, + 4096 /* rx ring free */, + 0 /* cq ring used */, + 32 /* cq ring free */, + 0 /* dequeue calls */, + /* 10 dequeue burst buckets */ + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + }; + if (ret != RTE_DIM(port_expected)) { + printf( + "%s %d: wrong number of port stats (%d), expected %zu\n", + __func__, __LINE__, ret, RTE_DIM(port_expected)); + } + + for (i = 0; (signed int)i < ret; i++) { + if (port_expected[i] != values[i]) { + printf( + "%s : %d: Error stat %s is %"PRIu64 + ", expected %"PRIu64"\n", + __func__, __LINE__, xstats_names[i].name, + values[i], port_expected[i]); + goto fail; + } + } + + ret = rte_event_dev_xstats_reset(evdev, RTE_EVENT_DEV_XSTATS_PORT, + 0, NULL, 0); + + /* ensure reset statistics are zero-ed */ + static const uint64_t port_expected_zero[] = { + 0 /* rx */, + 0 /* tx */, + 0 /* drop */, + 0 /* inflights */, + 0 /* avg pkt cycles */, + 29 /* credits */, + 0 /* rx ring used */, + 4096 /* rx ring free */, + 0 /* cq ring used */, + 32 /* cq ring free */, + 0 /* dequeue calls */, + /* 10 dequeue burst buckets */ + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + }; + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_PORT, + 0, ids, values, num_stats); + for (i = 0; (signed int)i < ret; i++) { + if (port_expected_zero[i] != values[i]) { + printf( + "%d, Error, xstat %d (id %d) %s : %"PRIu64 + ", expect %"PRIu64"\n", + __LINE__, i, ids[i], xstats_names[i].name, + values[i], port_expected_zero[i]); + goto fail; + } + } + + /* QUEUE STATS TESTS */ + num_stats = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, 0, + xstats_names, ids, XSTATS_MAX); + ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_QUEUE, + 0, ids, values, num_stats); + if (ret < 0) { + printf("xstats get returned %d\n", ret); + goto fail; + } + if ((unsigned int)ret > XSTATS_MAX) + printf("%s %d: more xstats available than space\n", + __func__, __LINE__); + + static const uint64_t queue_expected[] = { + 3 /* rx */, + 3 /* tx */, + 0 /* drop */, + 3 /* inflights */, + 0, 0, 0, 0, /* iq 0, 1, 2, 3 used */ + /* QID-to-Port: pinned_flows, packets */ + 0, 0, + 0, 0, + 1, 3, + 0, 0, + }; + for (i = 0; (signed int)i < ret; i++) { + if (queue_expected[i] != values[i]) { + printf( + "%d, Error, xstat %d (id %d) %s : %"PRIu64 + ", expect %"PRIu64"\n", + __LINE__, i, ids[i], xstats_names[i].name, + values[i], queue_expected[i]); + goto fail; + } + } + + /* Reset the queue stats here */ + ret = rte_event_dev_xstats_reset(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, 0, + NULL, + 0); + + /* Verify that the resetable stats are reset, and others are not */ + static const uint64_t queue_expected_zero[] = { + 0 /* rx */, + 0 /* tx */, + 0 /* drop */, + 3 /* inflight */, + 0, 0, 0, 0, /* 4 iq used */ + /* QID-to-Port: pinned_flows, packets */ + 0, 0, + 0, 0, + 1, 0, + 0, 0, + }; + + ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_QUEUE, 0, + ids, values, num_stats); + int fails = 0; + for (i = 0; (signed int)i < ret; i++) { + if (queue_expected_zero[i] != values[i]) { + printf( + "%d, Error, xstat %d (id %d) %s : %"PRIu64 + ", expect %"PRIu64"\n", + __LINE__, i, ids[i], xstats_names[i].name, + values[i], queue_expected_zero[i]); + fails++; + } + } + if (fails) { + printf("%d : %d of values were not as expected above\n", + __LINE__, fails); + goto fail; + } + + cleanup(t); + return 0; + +fail: + rte_event_dev_dump(0, stdout); + cleanup(t); + return -1; +} + + +static int +xstats_id_abuse_tests(struct test *t) +{ + int err; + const uint32_t XSTATS_MAX = 1024; + const uint32_t link_port = 2; + + uint32_t ids[XSTATS_MAX]; + struct rte_event_dev_xstats_name xstats_names[XSTATS_MAX]; + + /* Create instance with 4 ports */ + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + goto fail; + } + + err = rte_event_port_link(evdev, t->port[link_port], NULL, NULL, 0); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + goto fail; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + goto fail; + } + + /* no test for device, as it ignores the port/q number */ + int num_stats = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_PORT, + UINT8_MAX-1, xstats_names, ids, + XSTATS_MAX); + if (num_stats != 0) { + printf("%d: expected %d stats, got return %d\n", __LINE__, + 0, num_stats); + goto fail; + } + + num_stats = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, + UINT8_MAX-1, xstats_names, ids, + XSTATS_MAX); + if (num_stats != 0) { + printf("%d: expected %d stats, got return %d\n", __LINE__, + 0, num_stats); + goto fail; + } + + cleanup(t); + return 0; +fail: + cleanup(t); + return -1; +} + +static int +port_reconfig_credits(struct test *t) +{ + if (init(t, 1, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + uint32_t i; + const uint32_t NUM_ITERS = 32; + for (i = 0; i < NUM_ITERS; i++) { + const struct rte_event_queue_conf conf = { + .schedule_type = RTE_SCHED_TYPE_ATOMIC, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + if (rte_event_queue_setup(evdev, 0, &conf) < 0) { + printf("%d: error creating qid\n", __LINE__); + return -1; + } + t->qid[0] = 0; + + static const struct rte_event_port_conf port_conf = { + .new_event_threshold = 128, + .dequeue_depth = 32, + .enqueue_depth = 64, + .disable_implicit_release = 0, + }; + if (rte_event_port_setup(evdev, 0, &port_conf) < 0) { + printf("%d Error setting up port\n", __LINE__); + return -1; + } + + int links = rte_event_port_link(evdev, 0, NULL, NULL, 0); + if (links != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + goto fail; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + goto fail; + } + + const uint32_t NPKTS = 1; + uint32_t j; + for (j = 0; j < NPKTS; j++) { + struct rte_event ev; + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + goto fail; + } + ev.queue_id = t->qid[0]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = arp; + int err = rte_event_enqueue_burst(evdev, 0, &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + rte_event_dev_dump(0, stdout); + goto fail; + } + } + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + struct rte_event ev[NPKTS]; + int deq = rte_event_dequeue_burst(evdev, t->port[0], ev, + NPKTS, 0); + if (deq != 1) + printf("%d error; no packet dequeued\n", __LINE__); + + /* let cleanup below stop the device on last iter */ + if (i != NUM_ITERS-1) + rte_event_dev_stop(evdev); + } + + cleanup(t); + return 0; +fail: + cleanup(t); + return -1; +} + +static int +port_single_lb_reconfig(struct test *t) +{ + if (init(t, 2, 2) < 0) { + printf("%d: Error initializing device\n", __LINE__); + goto fail; + } + + static const struct rte_event_queue_conf conf_lb_atomic = { + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .schedule_type = RTE_SCHED_TYPE_ATOMIC, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + if (rte_event_queue_setup(evdev, 0, &conf_lb_atomic) < 0) { + printf("%d: error creating qid\n", __LINE__); + goto fail; + } + + static const struct rte_event_queue_conf conf_single_link = { + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .event_queue_cfg = RTE_EVENT_QUEUE_CFG_SINGLE_LINK, + }; + if (rte_event_queue_setup(evdev, 1, &conf_single_link) < 0) { + printf("%d: error creating qid\n", __LINE__); + goto fail; + } + + struct rte_event_port_conf port_conf = { + .new_event_threshold = 128, + .dequeue_depth = 32, + .enqueue_depth = 64, + .disable_implicit_release = 0, + }; + if (rte_event_port_setup(evdev, 0, &port_conf) < 0) { + printf("%d Error setting up port\n", __LINE__); + goto fail; + } + if (rte_event_port_setup(evdev, 1, &port_conf) < 0) { + printf("%d Error setting up port\n", __LINE__); + goto fail; + } + + /* link port to lb queue */ + uint8_t queue_id = 0; + if (rte_event_port_link(evdev, 0, &queue_id, NULL, 1) != 1) { + printf("%d: error creating link for qid\n", __LINE__); + goto fail; + } + + int ret = rte_event_port_unlink(evdev, 0, &queue_id, 1); + if (ret != 1) { + printf("%d: Error unlinking lb port\n", __LINE__); + goto fail; + } + + queue_id = 1; + if (rte_event_port_link(evdev, 0, &queue_id, NULL, 1) != 1) { + printf("%d: error creating link for qid\n", __LINE__); + goto fail; + } + + queue_id = 0; + int err = rte_event_port_link(evdev, 1, &queue_id, NULL, 1); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + goto fail; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + goto fail; + } + + cleanup(t); + return 0; +fail: + cleanup(t); + return -1; +} + +static int +xstats_brute_force(struct test *t) +{ + uint32_t i; + const uint32_t XSTATS_MAX = 1024; + uint32_t ids[XSTATS_MAX]; + uint64_t values[XSTATS_MAX]; + struct rte_event_dev_xstats_name xstats_names[XSTATS_MAX]; + + + /* Create instance with 4 ports */ + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + int err = rte_event_port_link(evdev, t->port[0], NULL, NULL, 0); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + goto fail; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + goto fail; + } + + for (i = 0; i < XSTATS_MAX; i++) + ids[i] = i; + + for (i = 0; i < 3; i++) { + uint32_t mode = RTE_EVENT_DEV_XSTATS_DEVICE + i; + uint32_t j; + for (j = 0; j < UINT8_MAX; j++) { + rte_event_dev_xstats_names_get(evdev, mode, + j, xstats_names, ids, XSTATS_MAX); + + rte_event_dev_xstats_get(evdev, mode, j, ids, + values, XSTATS_MAX); + } + } + + cleanup(t); + return 0; +fail: + cleanup(t); + return -1; +} + +static int +xstats_id_reset_tests(struct test *t) +{ + const int wrk_enq = 2; + int err; + + /* Create instance with 4 ports */ + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* CQ mapping to QID */ + err = rte_event_port_link(evdev, t->port[wrk_enq], NULL, NULL, 0); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + goto fail; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + goto fail; + } + +#define XSTATS_MAX 1024 + int ret; + uint32_t i; + uint32_t ids[XSTATS_MAX]; + uint64_t values[XSTATS_MAX]; + struct rte_event_dev_xstats_name xstats_names[XSTATS_MAX]; + + for (i = 0; i < XSTATS_MAX; i++) + ids[i] = i; + +#define NUM_DEV_STATS 6 + /* Device names / values */ + int num_stats = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, + 0, xstats_names, ids, XSTATS_MAX); + if (num_stats != NUM_DEV_STATS) { + printf("%d: expected %d stats, got return %d\n", __LINE__, + NUM_DEV_STATS, num_stats); + goto fail; + } + ret = rte_event_dev_xstats_get(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, + 0, ids, values, num_stats); + if (ret != NUM_DEV_STATS) { + printf("%d: expected %d stats, got return %d\n", __LINE__, + NUM_DEV_STATS, ret); + goto fail; + } + +#define NPKTS 7 + for (i = 0; i < NPKTS; i++) { + struct rte_event ev; + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + goto fail; + } + ev.queue_id = t->qid[i]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = arp; + arp->seqn = i; + + int err = rte_event_enqueue_burst(evdev, t->port[0], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + goto fail; + } + } + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + static const char * const dev_names[] = { + "dev_rx", "dev_tx", "dev_drop", "dev_sched_calls", + "dev_sched_no_iq_enq", "dev_sched_no_cq_enq", + }; + uint64_t dev_expected[] = {NPKTS, NPKTS, 0, 1, 0, 0}; + for (i = 0; (int)i < ret; i++) { + unsigned int id; + uint64_t val = rte_event_dev_xstats_by_name_get(evdev, + dev_names[i], + &id); + if (id != i) { + printf("%d: %s id incorrect, expected %d got %d\n", + __LINE__, dev_names[i], i, id); + goto fail; + } + if (val != dev_expected[i]) { + printf("%d: %s value incorrect, expected %" + PRIu64" got %d\n", __LINE__, dev_names[i], + dev_expected[i], id); + goto fail; + } + /* reset to zero */ + int reset_ret = rte_event_dev_xstats_reset(evdev, + RTE_EVENT_DEV_XSTATS_DEVICE, 0, + &id, + 1); + if (reset_ret) { + printf("%d: failed to reset successfully\n", __LINE__); + goto fail; + } + dev_expected[i] = 0; + /* check value again */ + val = rte_event_dev_xstats_by_name_get(evdev, dev_names[i], 0); + if (val != dev_expected[i]) { + printf("%d: %s value incorrect, expected %"PRIu64 + " got %"PRIu64"\n", __LINE__, dev_names[i], + dev_expected[i], val); + goto fail; + } + }; + +/* 48 is stat offset from start of the devices whole xstats. + * This WILL break every time we add a statistic to a port + * or the device, but there is no other way to test + */ +#define PORT_OFF 48 +/* num stats for the tested port. CQ size adds more stats to a port */ +#define NUM_PORT_STATS 21 +/* the port to test. */ +#define PORT 2 + num_stats = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_PORT, PORT, + xstats_names, ids, XSTATS_MAX); + if (num_stats != NUM_PORT_STATS) { + printf("%d: expected %d stats, got return %d\n", + __LINE__, NUM_PORT_STATS, num_stats); + goto fail; + } + ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_PORT, PORT, + ids, values, num_stats); + + if (ret != NUM_PORT_STATS) { + printf("%d: expected %d stats, got return %d\n", + __LINE__, NUM_PORT_STATS, ret); + goto fail; + } + static const char * const port_names[] = { + "port_2_rx", + "port_2_tx", + "port_2_drop", + "port_2_inflight", + "port_2_avg_pkt_cycles", + "port_2_credits", + "port_2_rx_ring_used", + "port_2_rx_ring_free", + "port_2_cq_ring_used", + "port_2_cq_ring_free", + "port_2_dequeue_calls", + "port_2_dequeues_returning_0", + "port_2_dequeues_returning_1-4", + "port_2_dequeues_returning_5-8", + "port_2_dequeues_returning_9-12", + "port_2_dequeues_returning_13-16", + "port_2_dequeues_returning_17-20", + "port_2_dequeues_returning_21-24", + "port_2_dequeues_returning_25-28", + "port_2_dequeues_returning_29-32", + "port_2_dequeues_returning_33-36", + }; + uint64_t port_expected[] = { + 0, /* rx */ + NPKTS, /* tx */ + 0, /* drop */ + NPKTS, /* inflight */ + 0, /* avg pkt cycles */ + 0, /* credits */ + 0, /* rx ring used */ + 4096, /* rx ring free */ + NPKTS, /* cq ring used */ + 25, /* cq ring free */ + 0, /* dequeue zero calls */ + 0, 0, 0, 0, 0, /* 10 dequeue buckets */ + 0, 0, 0, 0, 0, + }; + uint64_t port_expected_zero[] = { + 0, /* rx */ + 0, /* tx */ + 0, /* drop */ + NPKTS, /* inflight */ + 0, /* avg pkt cycles */ + 0, /* credits */ + 0, /* rx ring used */ + 4096, /* rx ring free */ + NPKTS, /* cq ring used */ + 25, /* cq ring free */ + 0, /* dequeue zero calls */ + 0, 0, 0, 0, 0, /* 10 dequeue buckets */ + 0, 0, 0, 0, 0, + }; + if (RTE_DIM(port_expected) != NUM_PORT_STATS || + RTE_DIM(port_names) != NUM_PORT_STATS) { + printf("%d: port array of wrong size\n", __LINE__); + goto fail; + } + + int failed = 0; + for (i = 0; (int)i < ret; i++) { + unsigned int id; + uint64_t val = rte_event_dev_xstats_by_name_get(evdev, + port_names[i], + &id); + if (id != i + PORT_OFF) { + printf("%d: %s id incorrect, expected %d got %d\n", + __LINE__, port_names[i], i+PORT_OFF, + id); + failed = 1; + } + if (val != port_expected[i]) { + printf("%d: %s value incorrect, expected %"PRIu64 + " got %d\n", __LINE__, port_names[i], + port_expected[i], id); + failed = 1; + } + /* reset to zero */ + int reset_ret = rte_event_dev_xstats_reset(evdev, + RTE_EVENT_DEV_XSTATS_PORT, PORT, + &id, + 1); + if (reset_ret) { + printf("%d: failed to reset successfully\n", __LINE__); + failed = 1; + } + /* check value again */ + val = rte_event_dev_xstats_by_name_get(evdev, port_names[i], 0); + if (val != port_expected_zero[i]) { + printf("%d: %s value incorrect, expected %"PRIu64 + " got %"PRIu64"\n", __LINE__, port_names[i], + port_expected_zero[i], val); + failed = 1; + } + }; + if (failed) + goto fail; + +/* num queue stats */ +#define NUM_Q_STATS 16 +/* queue offset from start of the devices whole xstats. + * This will break every time we add a statistic to a device/port/queue + */ +#define QUEUE_OFF 90 + const uint32_t queue = 0; + num_stats = rte_event_dev_xstats_names_get(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, queue, + xstats_names, ids, XSTATS_MAX); + if (num_stats != NUM_Q_STATS) { + printf("%d: expected %d stats, got return %d\n", + __LINE__, NUM_Q_STATS, num_stats); + goto fail; + } + ret = rte_event_dev_xstats_get(evdev, RTE_EVENT_DEV_XSTATS_QUEUE, + queue, ids, values, num_stats); + if (ret != NUM_Q_STATS) { + printf("%d: expected 21 stats, got return %d\n", __LINE__, ret); + goto fail; + } + static const char * const queue_names[] = { + "qid_0_rx", + "qid_0_tx", + "qid_0_drop", + "qid_0_inflight", + "qid_0_iq_0_used", + "qid_0_iq_1_used", + "qid_0_iq_2_used", + "qid_0_iq_3_used", + "qid_0_port_0_pinned_flows", + "qid_0_port_0_packets", + "qid_0_port_1_pinned_flows", + "qid_0_port_1_packets", + "qid_0_port_2_pinned_flows", + "qid_0_port_2_packets", + "qid_0_port_3_pinned_flows", + "qid_0_port_3_packets", + }; + uint64_t queue_expected[] = { + 7, /* rx */ + 7, /* tx */ + 0, /* drop */ + 7, /* inflight */ + 0, /* iq 0 used */ + 0, /* iq 1 used */ + 0, /* iq 2 used */ + 0, /* iq 3 used */ + /* QID-to-Port: pinned_flows, packets */ + 0, 0, + 0, 0, + 1, 7, + 0, 0, + }; + uint64_t queue_expected_zero[] = { + 0, /* rx */ + 0, /* tx */ + 0, /* drop */ + 7, /* inflight */ + 0, /* iq 0 used */ + 0, /* iq 1 used */ + 0, /* iq 2 used */ + 0, /* iq 3 used */ + /* QID-to-Port: pinned_flows, packets */ + 0, 0, + 0, 0, + 1, 0, + 0, 0, + }; + if (RTE_DIM(queue_expected) != NUM_Q_STATS || + RTE_DIM(queue_expected_zero) != NUM_Q_STATS || + RTE_DIM(queue_names) != NUM_Q_STATS) { + printf("%d : queue array of wrong size\n", __LINE__); + goto fail; + } + + failed = 0; + for (i = 0; (int)i < ret; i++) { + unsigned int id; + uint64_t val = rte_event_dev_xstats_by_name_get(evdev, + queue_names[i], + &id); + if (id != i + QUEUE_OFF) { + printf("%d: %s id incorrect, expected %d got %d\n", + __LINE__, queue_names[i], i+QUEUE_OFF, + id); + failed = 1; + } + if (val != queue_expected[i]) { + printf("%d: %d: %s value , expected %"PRIu64 + " got %"PRIu64"\n", i, __LINE__, + queue_names[i], queue_expected[i], val); + failed = 1; + } + /* reset to zero */ + int reset_ret = rte_event_dev_xstats_reset(evdev, + RTE_EVENT_DEV_XSTATS_QUEUE, + queue, &id, 1); + if (reset_ret) { + printf("%d: failed to reset successfully\n", __LINE__); + failed = 1; + } + /* check value again */ + val = rte_event_dev_xstats_by_name_get(evdev, queue_names[i], + 0); + if (val != queue_expected_zero[i]) { + printf("%d: %s value incorrect, expected %"PRIu64 + " got %"PRIu64"\n", __LINE__, queue_names[i], + queue_expected_zero[i], val); + failed = 1; + } + }; + + if (failed) + goto fail; + + cleanup(t); + return 0; +fail: + cleanup(t); + return -1; +} + +static int +ordered_reconfigure(struct test *t) +{ + if (init(t, 1, 1) < 0 || + create_ports(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + const struct rte_event_queue_conf conf = { + .schedule_type = RTE_SCHED_TYPE_ORDERED, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + + if (rte_event_queue_setup(evdev, 0, &conf) < 0) { + printf("%d: error creating qid\n", __LINE__); + goto failed; + } + + if (rte_event_queue_setup(evdev, 0, &conf) < 0) { + printf("%d: error creating qid, for 2nd time\n", __LINE__); + goto failed; + } + + rte_event_port_link(evdev, t->port[0], NULL, NULL, 0); + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + cleanup(t); + return 0; +failed: + cleanup(t); + return -1; +} + +static int +qid_priorities(struct test *t) +{ + /* Test works by having a CQ with enough empty space for all packets, + * and enqueueing 3 packets to 3 QIDs. They must return based on the + * priority of the QID, not the ingress order, to pass the test + */ + unsigned int i; + /* Create instance with 1 ports, and 3 qids */ + if (init(t, 3, 1) < 0 || + create_ports(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + for (i = 0; i < 3; i++) { + /* Create QID */ + const struct rte_event_queue_conf conf = { + .schedule_type = RTE_SCHED_TYPE_ATOMIC, + /* increase priority (0 == highest), as we go */ + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL - i, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + + if (rte_event_queue_setup(evdev, i, &conf) < 0) { + printf("%d: error creating qid %d\n", __LINE__, i); + return -1; + } + t->qid[i] = i; + } + t->nb_qids = i; + /* map all QIDs to port */ + rte_event_port_link(evdev, t->port[0], NULL, NULL, 0); + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* enqueue 3 packets, setting seqn and QID to check priority */ + for (i = 0; i < 3; i++) { + struct rte_event ev; + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + ev.queue_id = t->qid[i]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = arp; + arp->seqn = i; + + int err = rte_event_enqueue_burst(evdev, t->port[0], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* dequeue packets, verify priority was upheld */ + struct rte_event ev[32]; + uint32_t deq_pkts = + rte_event_dequeue_burst(evdev, t->port[0], ev, 32, 0); + if (deq_pkts != 3) { + printf("%d: failed to deq packets\n", __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + for (i = 0; i < 3; i++) { + if (ev[i].mbuf->seqn != 2-i) { + printf( + "%d: qid priority test: seqn %d incorrectly prioritized\n", + __LINE__, i); + } + } + + cleanup(t); + return 0; +} + +static int +unlink_in_progress(struct test *t) +{ + /* Test unlinking API, in particular that when an unlink request has + * not yet been seen by the scheduler thread, that the + * unlink_in_progress() function returns the number of unlinks. + */ + unsigned int i; + /* Create instance with 1 ports, and 3 qids */ + if (init(t, 3, 1) < 0 || + create_ports(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + for (i = 0; i < 3; i++) { + /* Create QID */ + const struct rte_event_queue_conf conf = { + .schedule_type = RTE_SCHED_TYPE_ATOMIC, + /* increase priority (0 == highest), as we go */ + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL - i, + .nb_atomic_flows = 1024, + .nb_atomic_order_sequences = 1024, + }; + + if (rte_event_queue_setup(evdev, i, &conf) < 0) { + printf("%d: error creating qid %d\n", __LINE__, i); + return -1; + } + t->qid[i] = i; + } + t->nb_qids = i; + /* map all QIDs to port */ + rte_event_port_link(evdev, t->port[0], NULL, NULL, 0); + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* unlink all ports to have outstanding unlink requests */ + int ret = rte_event_port_unlink(evdev, t->port[0], NULL, 0); + if (ret < 0) { + printf("%d: Failed to unlink queues\n", __LINE__); + return -1; + } + + /* get active unlinks here, expect 3 */ + int unlinks_in_progress = + rte_event_port_unlinks_in_progress(evdev, t->port[0]); + if (unlinks_in_progress != 3) { + printf("%d: Expected num unlinks in progress == 3, got %d\n", + __LINE__, unlinks_in_progress); + return -1; + } + + /* run scheduler service on this thread to ack the unlinks */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* active unlinks expected as 0 as scheduler thread has acked */ + unlinks_in_progress = + rte_event_port_unlinks_in_progress(evdev, t->port[0]); + if (unlinks_in_progress != 0) { + printf("%d: Expected num unlinks in progress == 0, got %d\n", + __LINE__, unlinks_in_progress); + } + + cleanup(t); + return 0; +} + +static int +load_balancing(struct test *t) +{ + const int rx_enq = 0; + int err; + uint32_t i; + + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + for (i = 0; i < 3; i++) { + /* map port 1 - 3 inclusive */ + if (rte_event_port_link(evdev, t->port[i+1], &t->qid[0], + NULL, 1) != 1) { + printf("%d: error mapping qid to port %d\n", + __LINE__, i); + return -1; + } + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /************** FORWARD ****************/ + /* + * Create a set of flows that test the load-balancing operation of the + * implementation. Fill CQ 0 and 1 with flows 0 and 1, and test + * with a new flow, which should be sent to the 3rd mapped CQ + */ + static uint32_t flows[] = {0, 1, 1, 0, 0, 2, 2, 0, 2}; + + for (i = 0; i < RTE_DIM(flows); i++) { + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = t->qid[0], + .flow_id = flows[i], + .mbuf = arp, + }; + /* generate pkt and enqueue */ + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + struct test_event_dev_stats stats; + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: failed to get stats\n", __LINE__); + return -1; + } + + if (stats.port_inflight[1] != 4) { + printf("%d:%s: port 1 inflight not correct\n", __LINE__, + __func__); + return -1; + } + if (stats.port_inflight[2] != 2) { + printf("%d:%s: port 2 inflight not correct\n", __LINE__, + __func__); + return -1; + } + if (stats.port_inflight[3] != 3) { + printf("%d:%s: port 3 inflight not correct\n", __LINE__, + __func__); + return -1; + } + + cleanup(t); + return 0; +} + +static int +load_balancing_history(struct test *t) +{ + struct test_event_dev_stats stats = {0}; + const int rx_enq = 0; + int err; + uint32_t i; + + /* Create instance with 1 atomic QID going to 3 ports + 1 prod port */ + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) + return -1; + + /* CQ mapping to QID */ + if (rte_event_port_link(evdev, t->port[1], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping port 1 qid\n", __LINE__); + return -1; + } + if (rte_event_port_link(evdev, t->port[2], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping port 2 qid\n", __LINE__); + return -1; + } + if (rte_event_port_link(evdev, t->port[3], &t->qid[0], NULL, 1) != 1) { + printf("%d: error mapping port 3 qid\n", __LINE__); + return -1; + } + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* + * Create a set of flows that test the load-balancing operation of the + * implementation. Fill CQ 0, 1 and 2 with flows 0, 1 and 2, drop + * the packet from CQ 0, send in a new set of flows. Ensure that: + * 1. The new flow 3 gets into the empty CQ0 + * 2. packets for existing flow gets added into CQ1 + * 3. Next flow 0 pkt is now onto CQ2, since CQ0 and CQ1 now contain + * more outstanding pkts + * + * This test makes sure that when a flow ends (i.e. all packets + * have been completed for that flow), that the flow can be moved + * to a different CQ when new packets come in for that flow. + */ + static uint32_t flows1[] = {0, 1, 1, 2}; + + for (i = 0; i < RTE_DIM(flows1); i++) { + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + struct rte_event ev = { + .flow_id = flows1[i], + .op = RTE_EVENT_OP_NEW, + .queue_id = t->qid[0], + .event_type = RTE_EVENT_TYPE_CPU, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .mbuf = arp + }; + + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + arp->hash.rss = flows1[i]; + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + + /* call the scheduler */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* Dequeue the flow 0 packet from port 1, so that we can then drop */ + struct rte_event ev; + if (!rte_event_dequeue_burst(evdev, t->port[1], &ev, 1, 0)) { + printf("%d: failed to dequeue\n", __LINE__); + return -1; + } + if (ev.mbuf->hash.rss != flows1[0]) { + printf("%d: unexpected flow received\n", __LINE__); + return -1; + } + + /* drop the flow 0 packet from port 1 */ + rte_event_enqueue_burst(evdev, t->port[1], &release_ev, 1); + + /* call the scheduler */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* + * Set up the next set of flows, first a new flow to fill up + * CQ 0, so that the next flow 0 packet should go to CQ2 + */ + static uint32_t flows2[] = { 3, 3, 3, 1, 1, 0 }; + + for (i = 0; i < RTE_DIM(flows2); i++) { + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + struct rte_event ev = { + .flow_id = flows2[i], + .op = RTE_EVENT_OP_NEW, + .queue_id = t->qid[0], + .event_type = RTE_EVENT_TYPE_CPU, + .priority = RTE_EVENT_DEV_PRIORITY_NORMAL, + .mbuf = arp + }; + + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + arp->hash.rss = flows2[i]; + + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + + /* schedule */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d:failed to get stats\n", __LINE__); + return -1; + } + + /* + * Now check the resulting inflights on each port. + */ + if (stats.port_inflight[1] != 3) { + printf("%d:%s: port 1 inflight not correct\n", __LINE__, + __func__); + printf("Inflights, ports 1, 2, 3: %u, %u, %u\n", + (unsigned int)stats.port_inflight[1], + (unsigned int)stats.port_inflight[2], + (unsigned int)stats.port_inflight[3]); + return -1; + } + if (stats.port_inflight[2] != 4) { + printf("%d:%s: port 2 inflight not correct\n", __LINE__, + __func__); + printf("Inflights, ports 1, 2, 3: %u, %u, %u\n", + (unsigned int)stats.port_inflight[1], + (unsigned int)stats.port_inflight[2], + (unsigned int)stats.port_inflight[3]); + return -1; + } + if (stats.port_inflight[3] != 2) { + printf("%d:%s: port 3 inflight not correct\n", __LINE__, + __func__); + printf("Inflights, ports 1, 2, 3: %u, %u, %u\n", + (unsigned int)stats.port_inflight[1], + (unsigned int)stats.port_inflight[2], + (unsigned int)stats.port_inflight[3]); + return -1; + } + + for (i = 1; i <= 3; i++) { + struct rte_event ev; + while (rte_event_dequeue_burst(evdev, i, &ev, 1, 0)) + rte_event_enqueue_burst(evdev, i, &release_ev, 1); + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + cleanup(t); + return 0; +} + +static int +invalid_qid(struct test *t) +{ + struct test_event_dev_stats stats; + const int rx_enq = 0; + int err; + uint32_t i; + + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* CQ mapping to QID */ + for (i = 0; i < 4; i++) { + err = rte_event_port_link(evdev, t->port[i], &t->qid[0], + NULL, 1); + if (err != 1) { + printf("%d: error mapping port 1 qid\n", __LINE__); + return -1; + } + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* + * Send in a packet with an invalid qid to the scheduler. + * We should see the packed enqueued OK, but the inflights for + * that packet should not be incremented, and the rx_dropped + * should be incremented. + */ + static uint32_t flows1[] = {20}; + + for (i = 0; i < RTE_DIM(flows1); i++) { + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = t->qid[0] + flows1[i], + .flow_id = i, + .mbuf = arp, + }; + /* generate pkt and enqueue */ + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + + /* call the scheduler */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: failed to get stats\n", __LINE__); + return -1; + } + + /* + * Now check the resulting inflights on the port, and the rx_dropped. + */ + if (stats.port_inflight[0] != 0) { + printf("%d:%s: port 1 inflight count not correct\n", __LINE__, + __func__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + if (stats.port_rx_dropped[0] != 1) { + printf("%d:%s: port 1 drops\n", __LINE__, __func__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + /* each packet drop should only be counted in one place - port or dev */ + if (stats.rx_dropped != 0) { + printf("%d:%s: port 1 dropped count not correct\n", __LINE__, + __func__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + + cleanup(t); + return 0; +} + +static int +single_packet(struct test *t) +{ + const uint32_t MAGIC_SEQN = 7321; + struct rte_event ev; + struct test_event_dev_stats stats; + const int rx_enq = 0; + const int wrk_enq = 2; + int err; + + /* Create instance with 4 ports */ + if (init(t, 1, 4) < 0 || + create_ports(t, 4) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* CQ mapping to QID */ + err = rte_event_port_link(evdev, t->port[wrk_enq], NULL, NULL, 0); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + cleanup(t); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /************** Gen pkt and enqueue ****************/ + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + + ev.op = RTE_EVENT_OP_NEW; + ev.priority = RTE_EVENT_DEV_PRIORITY_NORMAL; + ev.mbuf = arp; + ev.queue_id = 0; + ev.flow_id = 3; + arp->seqn = MAGIC_SEQN; + + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: failed to get stats\n", __LINE__); + return -1; + } + + if (stats.rx_pkts != 1 || + stats.tx_pkts != 1 || + stats.port_inflight[wrk_enq] != 1) { + printf("%d: Sched core didn't handle pkt as expected\n", + __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + + uint32_t deq_pkts; + + deq_pkts = rte_event_dequeue_burst(evdev, t->port[wrk_enq], &ev, 1, 0); + if (deq_pkts < 1) { + printf("%d: Failed to deq\n", __LINE__); + return -1; + } + + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: failed to get stats\n", __LINE__); + return -1; + } + + err = test_event_dev_stats_get(evdev, &stats); + if (ev.mbuf->seqn != MAGIC_SEQN) { + printf("%d: magic sequence number not dequeued\n", __LINE__); + return -1; + } + + rte_pktmbuf_free(ev.mbuf); + err = rte_event_enqueue_burst(evdev, t->port[wrk_enq], &release_ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + err = test_event_dev_stats_get(evdev, &stats); + if (stats.port_inflight[wrk_enq] != 0) { + printf("%d: port inflight not correct\n", __LINE__); + return -1; + } + + cleanup(t); + return 0; +} + +static int +inflight_counts(struct test *t) +{ + struct rte_event ev; + struct test_event_dev_stats stats; + const int rx_enq = 0; + const int p1 = 1; + const int p2 = 2; + int err; + int i; + + /* Create instance with 4 ports */ + if (init(t, 2, 3) < 0 || + create_ports(t, 3) < 0 || + create_atomic_qids(t, 2) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* CQ mapping to QID */ + err = rte_event_port_link(evdev, t->port[p1], &t->qid[0], NULL, 1); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + cleanup(t); + return -1; + } + err = rte_event_port_link(evdev, t->port[p2], &t->qid[1], NULL, 1); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + cleanup(t); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /************** FORWARD ****************/ +#define QID1_NUM 5 + for (i = 0; i < QID1_NUM; i++) { + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + goto err; + } + + ev.queue_id = t->qid[0]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = arp; + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + goto err; + } + } +#define QID2_NUM 3 + for (i = 0; i < QID2_NUM; i++) { + struct rte_mbuf *arp = rte_gen_arp(0, t->mbuf_pool); + + if (!arp) { + printf("%d: gen of pkt failed\n", __LINE__); + goto err; + } + ev.queue_id = t->qid[1]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = arp; + err = rte_event_enqueue_burst(evdev, t->port[rx_enq], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + goto err; + } + } + + /* schedule */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + err = test_event_dev_stats_get(evdev, &stats); + if (err) { + printf("%d: failed to get stats\n", __LINE__); + goto err; + } + + if (stats.rx_pkts != QID1_NUM + QID2_NUM || + stats.tx_pkts != QID1_NUM + QID2_NUM) { + printf("%d: Sched core didn't handle pkt as expected\n", + __LINE__); + goto err; + } + + if (stats.port_inflight[p1] != QID1_NUM) { + printf("%d: %s port 1 inflight not correct\n", __LINE__, + __func__); + goto err; + } + if (stats.port_inflight[p2] != QID2_NUM) { + printf("%d: %s port 2 inflight not correct\n", __LINE__, + __func__); + goto err; + } + + /************** DEQUEUE INFLIGHT COUNT CHECKS ****************/ + /* port 1 */ + struct rte_event events[QID1_NUM + QID2_NUM]; + uint32_t deq_pkts = rte_event_dequeue_burst(evdev, t->port[p1], events, + RTE_DIM(events), 0); + + if (deq_pkts != QID1_NUM) { + printf("%d: Port 1: DEQUEUE inflight failed\n", __LINE__); + goto err; + } + err = test_event_dev_stats_get(evdev, &stats); + if (stats.port_inflight[p1] != QID1_NUM) { + printf("%d: port 1 inflight decrement after DEQ != 0\n", + __LINE__); + goto err; + } + for (i = 0; i < QID1_NUM; i++) { + err = rte_event_enqueue_burst(evdev, t->port[p1], &release_ev, + 1); + if (err != 1) { + printf("%d: %s rte enqueue of inf release failed\n", + __LINE__, __func__); + goto err; + } + } + + /* + * As the scheduler core decrements inflights, it needs to run to + * process packets to act on the drop messages + */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + err = test_event_dev_stats_get(evdev, &stats); + if (stats.port_inflight[p1] != 0) { + printf("%d: port 1 inflight NON NULL after DROP\n", __LINE__); + goto err; + } + + /* port2 */ + deq_pkts = rte_event_dequeue_burst(evdev, t->port[p2], events, + RTE_DIM(events), 0); + if (deq_pkts != QID2_NUM) { + printf("%d: Port 2: DEQUEUE inflight failed\n", __LINE__); + goto err; + } + err = test_event_dev_stats_get(evdev, &stats); + if (stats.port_inflight[p2] != QID2_NUM) { + printf("%d: port 1 inflight decrement after DEQ != 0\n", + __LINE__); + goto err; + } + for (i = 0; i < QID2_NUM; i++) { + err = rte_event_enqueue_burst(evdev, t->port[p2], &release_ev, + 1); + if (err != 1) { + printf("%d: %s rte enqueue of inf release failed\n", + __LINE__, __func__); + goto err; + } + } + + /* + * As the scheduler core decrements inflights, it needs to run to + * process packets to act on the drop messages + */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + err = test_event_dev_stats_get(evdev, &stats); + if (stats.port_inflight[p2] != 0) { + printf("%d: port 2 inflight NON NULL after DROP\n", __LINE__); + goto err; + } + cleanup(t); + return 0; + +err: + rte_event_dev_dump(evdev, stdout); + cleanup(t); + return -1; +} + +static int +parallel_basic(struct test *t, int check_order) +{ + const uint8_t rx_port = 0; + const uint8_t w1_port = 1; + const uint8_t w3_port = 3; + const uint8_t tx_port = 4; + int err; + int i; + uint32_t deq_pkts, j; + struct rte_mbuf *mbufs[3]; + struct rte_mbuf *mbufs_out[3] = { 0 }; + const uint32_t MAGIC_SEQN = 1234; + + /* Create instance with 4 ports */ + if (init(t, 2, tx_port + 1) < 0 || + create_ports(t, tx_port + 1) < 0 || + (check_order ? create_ordered_qids(t, 1) : + create_unordered_qids(t, 1)) < 0 || + create_directed_qids(t, 1, &tx_port)) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* + * CQ mapping to QID + * We need three ports, all mapped to the same ordered qid0. Then we'll + * take a packet out to each port, re-enqueue in reverse order, + * then make sure the reordering has taken place properly when we + * dequeue from the tx_port. + * + * Simplified test setup diagram: + * + * rx_port w1_port + * \ / \ + * qid0 - w2_port - qid1 + * \ / \ + * w3_port tx_port + */ + /* CQ mapping to QID for LB ports (directed mapped on create) */ + for (i = w1_port; i <= w3_port; i++) { + err = rte_event_port_link(evdev, t->port[i], &t->qid[0], NULL, + 1); + if (err != 1) { + printf("%d: error mapping lb qid\n", __LINE__); + cleanup(t); + return -1; + } + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + /* Enqueue 3 packets to the rx port */ + for (i = 0; i < 3; i++) { + struct rte_event ev; + mbufs[i] = rte_gen_arp(0, t->mbuf_pool); + if (!mbufs[i]) { + printf("%d: gen of pkt failed\n", __LINE__); + return -1; + } + + ev.queue_id = t->qid[0]; + ev.op = RTE_EVENT_OP_NEW; + ev.mbuf = mbufs[i]; + mbufs[i]->seqn = MAGIC_SEQN + i; + + /* generate pkt and enqueue */ + err = rte_event_enqueue_burst(evdev, t->port[rx_port], &ev, 1); + if (err != 1) { + printf("%d: Failed to enqueue pkt %u, retval = %u\n", + __LINE__, i, err); + return -1; + } + } + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* use extra slot to make logic in loops easier */ + struct rte_event deq_ev[w3_port + 1]; + + /* Dequeue the 3 packets, one from each worker port */ + for (i = w1_port; i <= w3_port; i++) { + deq_pkts = rte_event_dequeue_burst(evdev, t->port[i], + &deq_ev[i], 1, 0); + if (deq_pkts != 1) { + printf("%d: Failed to deq\n", __LINE__); + rte_event_dev_dump(evdev, stdout); + return -1; + } + } + + /* Enqueue each packet in reverse order, flushing after each one */ + for (i = w3_port; i >= w1_port; i--) { + + deq_ev[i].op = RTE_EVENT_OP_FORWARD; + deq_ev[i].queue_id = t->qid[1]; + err = rte_event_enqueue_burst(evdev, t->port[i], &deq_ev[i], 1); + if (err != 1) { + printf("%d: Failed to enqueue\n", __LINE__); + return -1; + } + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* dequeue from the tx ports, we should get 3 packets */ + deq_pkts = rte_event_dequeue_burst(evdev, t->port[tx_port], deq_ev, + 3, 0); + + /* Check to see if we've got all 3 packets */ + if (deq_pkts != 3) { + printf("%d: expected 3 pkts at tx port got %d from port %d\n", + __LINE__, deq_pkts, tx_port); + rte_event_dev_dump(evdev, stdout); + return 1; + } + + /* Check to see if the sequence numbers are in expected order */ + if (check_order) { + for (j = 0 ; j < deq_pkts ; j++) { + if (deq_ev[j].mbuf->seqn != MAGIC_SEQN + j) { + printf( + "%d: Incorrect sequence number(%d) from port %d\n", + __LINE__, mbufs_out[j]->seqn, tx_port); + return -1; + } + } + } + + /* Destroy the instance */ + cleanup(t); + return 0; +} + +static int +ordered_basic(struct test *t) +{ + return parallel_basic(t, 1); +} + +static int +unordered_basic(struct test *t) +{ + return parallel_basic(t, 0); +} + +static int +holb(struct test *t) /* test to check we avoid basic head-of-line blocking */ +{ + const struct rte_event new_ev = { + .op = RTE_EVENT_OP_NEW + /* all other fields zero */ + }; + struct rte_event ev = new_ev; + unsigned int rx_port = 0; /* port we get the first flow on */ + char rx_port_used_stat[64]; + char rx_port_free_stat[64]; + char other_port_used_stat[64]; + + if (init(t, 1, 2) < 0 || + create_ports(t, 2) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + int nb_links = rte_event_port_link(evdev, t->port[1], NULL, NULL, 0); + if (rte_event_port_link(evdev, t->port[0], NULL, NULL, 0) != 1 || + nb_links != 1) { + printf("%d: Error links queue to ports\n", __LINE__); + goto err; + } + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + goto err; + } + + /* send one packet and see where it goes, port 0 or 1 */ + if (rte_event_enqueue_burst(evdev, t->port[0], &ev, 1) != 1) { + printf("%d: Error doing first enqueue\n", __LINE__); + goto err; + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + if (rte_event_dev_xstats_by_name_get(evdev, "port_0_cq_ring_used", NULL) + != 1) + rx_port = 1; + + snprintf(rx_port_used_stat, sizeof(rx_port_used_stat), + "port_%u_cq_ring_used", rx_port); + snprintf(rx_port_free_stat, sizeof(rx_port_free_stat), + "port_%u_cq_ring_free", rx_port); + snprintf(other_port_used_stat, sizeof(other_port_used_stat), + "port_%u_cq_ring_used", rx_port ^ 1); + if (rte_event_dev_xstats_by_name_get(evdev, rx_port_used_stat, NULL) + != 1) { + printf("%d: Error, first event not scheduled\n", __LINE__); + goto err; + } + + /* now fill up the rx port's queue with one flow to cause HOLB */ + do { + ev = new_ev; + if (rte_event_enqueue_burst(evdev, t->port[0], &ev, 1) != 1) { + printf("%d: Error with enqueue\n", __LINE__); + goto err; + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + } while (rte_event_dev_xstats_by_name_get(evdev, + rx_port_free_stat, NULL) != 0); + + /* one more packet, which needs to stay in IQ - i.e. HOLB */ + ev = new_ev; + if (rte_event_enqueue_burst(evdev, t->port[0], &ev, 1) != 1) { + printf("%d: Error with enqueue\n", __LINE__); + goto err; + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + /* check that the other port still has an empty CQ */ + if (rte_event_dev_xstats_by_name_get(evdev, other_port_used_stat, NULL) + != 0) { + printf("%d: Error, second port CQ is not empty\n", __LINE__); + goto err; + } + /* check IQ now has one packet */ + if (rte_event_dev_xstats_by_name_get(evdev, "qid_0_iq_0_used", NULL) + != 1) { + printf("%d: Error, QID does not have exactly 1 packet\n", + __LINE__); + goto err; + } + + /* send another flow, which should pass the other IQ entry */ + ev = new_ev; + ev.flow_id = 1; + if (rte_event_enqueue_burst(evdev, t->port[0], &ev, 1) != 1) { + printf("%d: Error with enqueue\n", __LINE__); + goto err; + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + if (rte_event_dev_xstats_by_name_get(evdev, other_port_used_stat, NULL) + != 1) { + printf("%d: Error, second flow did not pass out first\n", + __LINE__); + goto err; + } + + if (rte_event_dev_xstats_by_name_get(evdev, "qid_0_iq_0_used", NULL) + != 1) { + printf("%d: Error, QID does not have exactly 1 packet\n", + __LINE__); + goto err; + } + cleanup(t); + return 0; +err: + rte_event_dev_dump(evdev, stdout); + cleanup(t); + return -1; +} + +static void +flush(uint8_t dev_id __rte_unused, struct rte_event event, void *arg) +{ + *((uint8_t *) arg) += (event.u64 == 0xCA11BACC) ? 1 : 0; +} + +static int +dev_stop_flush(struct test *t) /* test to check we can properly flush events */ +{ + const struct rte_event new_ev = { + .op = RTE_EVENT_OP_NEW, + .u64 = 0xCA11BACC, + .queue_id = 0 + }; + struct rte_event ev = new_ev; + uint8_t count = 0; + int i; + + if (init(t, 1, 1) < 0 || + create_ports(t, 1) < 0 || + create_atomic_qids(t, 1) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* Link the queue so *_start() doesn't error out */ + if (rte_event_port_link(evdev, t->port[0], NULL, NULL, 0) != 1) { + printf("%d: Error linking queue to port\n", __LINE__); + goto err; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + goto err; + } + + for (i = 0; i < DEQUEUE_DEPTH + 1; i++) { + if (rte_event_enqueue_burst(evdev, t->port[0], &ev, 1) != 1) { + printf("%d: Error enqueuing events\n", __LINE__); + goto err; + } + } + + /* Schedule the events from the port to the IQ. At least one event + * should be remaining in the queue. + */ + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + if (rte_event_dev_stop_flush_callback_register(evdev, flush, &count)) { + printf("%d: Error installing the flush callback\n", __LINE__); + goto err; + } + + cleanup(t); + + if (count == 0) { + printf("%d: Error executing the flush callback\n", __LINE__); + goto err; + } + + if (rte_event_dev_stop_flush_callback_register(evdev, NULL, NULL)) { + printf("%d: Error uninstalling the flush callback\n", __LINE__); + goto err; + } + + return 0; +err: + rte_event_dev_dump(evdev, stdout); + cleanup(t); + return -1; +} + +static int +worker_loopback_worker_fn(void *arg) +{ + struct test *t = arg; + uint8_t port = t->port[1]; + int count = 0; + int enqd; + + /* + * Takes packets from the input port and then loops them back through + * the Eventdev. Each packet gets looped through QIDs 0-8, 16 times + * so each packet goes through 8*16 = 128 times. + */ + printf("%d: \tWorker function started\n", __LINE__); + while (count < NUM_PACKETS) { +#define BURST_SIZE 32 + struct rte_event ev[BURST_SIZE]; + uint16_t i, nb_rx = rte_event_dequeue_burst(evdev, port, ev, + BURST_SIZE, 0); + if (nb_rx == 0) { + rte_pause(); + continue; + } + + for (i = 0; i < nb_rx; i++) { + ev[i].queue_id++; + if (ev[i].queue_id != 8) { + ev[i].op = RTE_EVENT_OP_FORWARD; + enqd = rte_event_enqueue_burst(evdev, port, + &ev[i], 1); + if (enqd != 1) { + printf("%d: Can't enqueue FWD!!\n", + __LINE__); + return -1; + } + continue; + } + + ev[i].queue_id = 0; + ev[i].mbuf->udata64++; + if (ev[i].mbuf->udata64 != 16) { + ev[i].op = RTE_EVENT_OP_FORWARD; + enqd = rte_event_enqueue_burst(evdev, port, + &ev[i], 1); + if (enqd != 1) { + printf("%d: Can't enqueue FWD!!\n", + __LINE__); + return -1; + } + continue; + } + /* we have hit 16 iterations through system - drop */ + rte_pktmbuf_free(ev[i].mbuf); + count++; + ev[i].op = RTE_EVENT_OP_RELEASE; + enqd = rte_event_enqueue_burst(evdev, port, &ev[i], 1); + if (enqd != 1) { + printf("%d drop enqueue failed\n", __LINE__); + return -1; + } + } + } + + return 0; +} + +static int +worker_loopback_producer_fn(void *arg) +{ + struct test *t = arg; + uint8_t port = t->port[0]; + uint64_t count = 0; + + printf("%d: \tProducer function started\n", __LINE__); + while (count < NUM_PACKETS) { + struct rte_mbuf *m = 0; + do { + m = rte_pktmbuf_alloc(t->mbuf_pool); + } while (m == NULL); + + m->udata64 = 0; + + struct rte_event ev = { + .op = RTE_EVENT_OP_NEW, + .queue_id = t->qid[0], + .flow_id = (uintptr_t)m & 0xFFFF, + .mbuf = m, + }; + + if (rte_event_enqueue_burst(evdev, port, &ev, 1) != 1) { + while (rte_event_enqueue_burst(evdev, port, &ev, 1) != + 1) + rte_pause(); + } + + count++; + } + + return 0; +} + +static int +worker_loopback(struct test *t, uint8_t disable_implicit_release) +{ + /* use a single producer core, and a worker core to see what happens + * if the worker loops packets back multiple times + */ + struct test_event_dev_stats stats; + uint64_t print_cycles = 0, cycles = 0; + uint64_t tx_pkts = 0; + int err; + int w_lcore, p_lcore; + + if (init(t, 8, 2) < 0 || + create_atomic_qids(t, 8) < 0) { + printf("%d: Error initializing device\n", __LINE__); + return -1; + } + + /* RX with low max events */ + static struct rte_event_port_conf conf = { + .dequeue_depth = 32, + .enqueue_depth = 64, + }; + /* beware: this cannot be initialized in the static above as it would + * only be initialized once - and this needs to be set for multiple runs + */ + conf.new_event_threshold = 512; + conf.disable_implicit_release = disable_implicit_release; + + if (rte_event_port_setup(evdev, 0, &conf) < 0) { + printf("Error setting up RX port\n"); + return -1; + } + t->port[0] = 0; + /* TX with higher max events */ + conf.new_event_threshold = 4096; + if (rte_event_port_setup(evdev, 1, &conf) < 0) { + printf("Error setting up TX port\n"); + return -1; + } + t->port[1] = 1; + + /* CQ mapping to QID */ + err = rte_event_port_link(evdev, t->port[1], NULL, NULL, 0); + if (err != 8) { /* should have mapped all queues*/ + printf("%d: error mapping port 2 to all qids\n", __LINE__); + return -1; + } + + if (rte_event_dev_start(evdev) < 0) { + printf("%d: Error with start call\n", __LINE__); + return -1; + } + + p_lcore = rte_get_next_lcore( + /* start core */ -1, + /* skip master */ 1, + /* wrap */ 0); + w_lcore = rte_get_next_lcore(p_lcore, 1, 0); + + rte_eal_remote_launch(worker_loopback_producer_fn, t, p_lcore); + rte_eal_remote_launch(worker_loopback_worker_fn, t, w_lcore); + + print_cycles = cycles = rte_get_timer_cycles(); + while (rte_eal_get_lcore_state(p_lcore) != FINISHED || + rte_eal_get_lcore_state(w_lcore) != FINISHED) { + + rte_service_run_iter_on_app_lcore(t->service_id, 1); + + uint64_t new_cycles = rte_get_timer_cycles(); + + if (new_cycles - print_cycles > rte_get_timer_hz()) { + test_event_dev_stats_get(evdev, &stats); + printf( + "%d: \tSched Rx = %"PRIu64", Tx = %"PRIu64"\n", + __LINE__, stats.rx_pkts, stats.tx_pkts); + + print_cycles = new_cycles; + } + if (new_cycles - cycles > rte_get_timer_hz() * 3) { + test_event_dev_stats_get(evdev, &stats); + if (stats.tx_pkts == tx_pkts) { + rte_event_dev_dump(evdev, stdout); + printf("Dumping xstats:\n"); + xstats_print(); + printf( + "%d: No schedules for seconds, deadlock\n", + __LINE__); + return -1; + } + tx_pkts = stats.tx_pkts; + cycles = new_cycles; + } + } + rte_service_run_iter_on_app_lcore(t->service_id, 1); + /* ensure all completions are flushed */ + + rte_eal_mp_wait_lcore(); + + cleanup(t); + return 0; +} + +static struct rte_mempool *eventdev_func_mempool; + +int +test_sw_eventdev(void) +{ + struct test *t; + int ret; + + t = malloc(sizeof(struct test)); + if (t == NULL) + return -1; + /* manually initialize the op, older gcc's complain on static + * initialization of struct elements that are a bitfield. + */ + release_ev.op = RTE_EVENT_OP_RELEASE; + + const char *eventdev_name = "event_sw"; + evdev = rte_event_dev_get_dev_id(eventdev_name); + if (evdev < 0) { + printf("%d: Eventdev %s not found - creating.\n", + __LINE__, eventdev_name); + if (rte_vdev_init(eventdev_name, NULL) < 0) { + printf("Error creating eventdev\n"); + goto test_fail; + } + evdev = rte_event_dev_get_dev_id(eventdev_name); + if (evdev < 0) { + printf("Error finding newly created eventdev\n"); + goto test_fail; + } + } + + if (rte_event_dev_service_id_get(evdev, &t->service_id) < 0) { + printf("Failed to get service ID for software event dev\n"); + goto test_fail; + } + + rte_service_runstate_set(t->service_id, 1); + rte_service_set_runstate_mapped_check(t->service_id, 0); + + /* Only create mbuf pool once, reuse for each test run */ + if (!eventdev_func_mempool) { + eventdev_func_mempool = rte_pktmbuf_pool_create( + "EVENTDEV_SW_SA_MBUF_POOL", + (1<<12), /* 4k buffers */ + 32 /*MBUF_CACHE_SIZE*/, + 0, + 512, /* use very small mbufs */ + rte_socket_id()); + if (!eventdev_func_mempool) { + printf("ERROR creating mempool\n"); + goto test_fail; + } + } + t->mbuf_pool = eventdev_func_mempool; + printf("*** Running Single Directed Packet test...\n"); + ret = test_single_directed_packet(t); + if (ret != 0) { + printf("ERROR - Single Directed Packet test FAILED.\n"); + goto test_fail; + } + printf("*** Running Directed Forward Credit test...\n"); + ret = test_directed_forward_credits(t); + if (ret != 0) { + printf("ERROR - Directed Forward Credit test FAILED.\n"); + goto test_fail; + } + printf("*** Running Single Load Balanced Packet test...\n"); + ret = single_packet(t); + if (ret != 0) { + printf("ERROR - Single Packet test FAILED.\n"); + goto test_fail; + } + printf("*** Running Unordered Basic test...\n"); + ret = unordered_basic(t); + if (ret != 0) { + printf("ERROR - Unordered Basic test FAILED.\n"); + goto test_fail; + } + printf("*** Running Ordered Basic test...\n"); + ret = ordered_basic(t); + if (ret != 0) { + printf("ERROR - Ordered Basic test FAILED.\n"); + goto test_fail; + } + printf("*** Running Burst Packets test...\n"); + ret = burst_packets(t); + if (ret != 0) { + printf("ERROR - Burst Packets test FAILED.\n"); + goto test_fail; + } + printf("*** Running Load Balancing test...\n"); + ret = load_balancing(t); + if (ret != 0) { + printf("ERROR - Load Balancing test FAILED.\n"); + goto test_fail; + } + printf("*** Running Prioritized Directed test...\n"); + ret = test_priority_directed(t); + if (ret != 0) { + printf("ERROR - Prioritized Directed test FAILED.\n"); + goto test_fail; + } + printf("*** Running Prioritized Atomic test...\n"); + ret = test_priority_atomic(t); + if (ret != 0) { + printf("ERROR - Prioritized Atomic test FAILED.\n"); + goto test_fail; + } + + printf("*** Running Prioritized Ordered test...\n"); + ret = test_priority_ordered(t); + if (ret != 0) { + printf("ERROR - Prioritized Ordered test FAILED.\n"); + goto test_fail; + } + printf("*** Running Prioritized Unordered test...\n"); + ret = test_priority_unordered(t); + if (ret != 0) { + printf("ERROR - Prioritized Unordered test FAILED.\n"); + goto test_fail; + } + printf("*** Running Invalid QID test...\n"); + ret = invalid_qid(t); + if (ret != 0) { + printf("ERROR - Invalid QID test FAILED.\n"); + goto test_fail; + } + printf("*** Running Load Balancing History test...\n"); + ret = load_balancing_history(t); + if (ret != 0) { + printf("ERROR - Load Balancing History test FAILED.\n"); + goto test_fail; + } + printf("*** Running Inflight Count test...\n"); + ret = inflight_counts(t); + if (ret != 0) { + printf("ERROR - Inflight Count test FAILED.\n"); + goto test_fail; + } + printf("*** Running Abuse Inflights test...\n"); + ret = abuse_inflights(t); + if (ret != 0) { + printf("ERROR - Abuse Inflights test FAILED.\n"); + goto test_fail; + } + printf("*** Running XStats test...\n"); + ret = xstats_tests(t); + if (ret != 0) { + printf("ERROR - XStats test FAILED.\n"); + goto test_fail; + } + printf("*** Running XStats ID Reset test...\n"); + ret = xstats_id_reset_tests(t); + if (ret != 0) { + printf("ERROR - XStats ID Reset test FAILED.\n"); + goto test_fail; + } + printf("*** Running XStats Brute Force test...\n"); + ret = xstats_brute_force(t); + if (ret != 0) { + printf("ERROR - XStats Brute Force test FAILED.\n"); + goto test_fail; + } + printf("*** Running XStats ID Abuse test...\n"); + ret = xstats_id_abuse_tests(t); + if (ret != 0) { + printf("ERROR - XStats ID Abuse test FAILED.\n"); + goto test_fail; + } + printf("*** Running QID Priority test...\n"); + ret = qid_priorities(t); + if (ret != 0) { + printf("ERROR - QID Priority test FAILED.\n"); + goto test_fail; + } + printf("*** Running Unlink-in-progress test...\n"); + ret = unlink_in_progress(t); + if (ret != 0) { + printf("ERROR - Unlink in progress test FAILED.\n"); + goto test_fail; + } + printf("*** Running Ordered Reconfigure test...\n"); + ret = ordered_reconfigure(t); + if (ret != 0) { + printf("ERROR - Ordered Reconfigure test FAILED.\n"); + goto test_fail; + } + printf("*** Running Port LB Single Reconfig test...\n"); + ret = port_single_lb_reconfig(t); + if (ret != 0) { + printf("ERROR - Port LB Single Reconfig test FAILED.\n"); + goto test_fail; + } + printf("*** Running Port Reconfig Credits test...\n"); + ret = port_reconfig_credits(t); + if (ret != 0) { + printf("ERROR - Port Reconfig Credits Reset test FAILED.\n"); + goto test_fail; + } + printf("*** Running Head-of-line-blocking test...\n"); + ret = holb(t); + if (ret != 0) { + printf("ERROR - Head-of-line-blocking test FAILED.\n"); + goto test_fail; + } + printf("*** Running Stop Flush test...\n"); + ret = dev_stop_flush(t); + if (ret != 0) { + printf("ERROR - Stop Flush test FAILED.\n"); + goto test_fail; + } + if (rte_lcore_count() >= 3) { + printf("*** Running Worker loopback test...\n"); + ret = worker_loopback(t, 0); + if (ret != 0) { + printf("ERROR - Worker loopback test FAILED.\n"); + return ret; + } + + printf("*** Running Worker loopback test (implicit release disabled)...\n"); + ret = worker_loopback(t, 1); + if (ret != 0) { + printf("ERROR - Worker loopback test FAILED.\n"); + goto test_fail; + } + } else { + printf("### Not enough cores for worker loopback tests.\n"); + printf("### Need at least 3 cores for the tests.\n"); + } + + /* + * Free test instance, leaving mempool initialized, and a pointer to it + * in static eventdev_func_mempool, as it is re-used on re-runs + */ + free(t); + + printf("SW Eventdev Selftest Successful.\n"); + return 0; +test_fail: + free(t); + printf("SW Eventdev Selftest Failed.\n"); + return -1; +} diff --git a/src/spdk/dpdk/drivers/event/sw/sw_evdev_worker.c b/src/spdk/dpdk/drivers/event/sw/sw_evdev_worker.c new file mode 100644 index 000000000..063b919c7 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/sw_evdev_worker.c @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2017 Intel Corporation + */ + +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_event_ring.h> + +#include "sw_evdev.h" + +#define PORT_ENQUEUE_MAX_BURST_SIZE 64 + +static inline void +sw_event_release(struct sw_port *p, uint8_t index) +{ + /* + * Drops the next outstanding event in our history. Used on dequeue + * to clear any history before dequeuing more events. + */ + RTE_SET_USED(index); + + /* create drop message */ + struct rte_event ev; + ev.op = sw_qe_flag_map[RTE_EVENT_OP_RELEASE]; + + uint16_t free_count; + rte_event_ring_enqueue_burst(p->rx_worker_ring, &ev, 1, &free_count); + + /* each release returns one credit */ + p->outstanding_releases--; + p->inflight_credits++; +} + +/* + * special-case of rte_event_ring enqueue, with overriding the ops member on + * the events that get written to the ring. + */ +static inline unsigned int +enqueue_burst_with_ops(struct rte_event_ring *r, const struct rte_event *events, + unsigned int n, uint8_t *ops) +{ + struct rte_event tmp_evs[PORT_ENQUEUE_MAX_BURST_SIZE]; + unsigned int i; + + memcpy(tmp_evs, events, n * sizeof(events[0])); + for (i = 0; i < n; i++) + tmp_evs[i].op = ops[i]; + + return rte_event_ring_enqueue_burst(r, tmp_evs, n, NULL); +} + +uint16_t +sw_event_enqueue_burst(void *port, const struct rte_event ev[], uint16_t num) +{ + int32_t i; + uint8_t new_ops[PORT_ENQUEUE_MAX_BURST_SIZE]; + struct sw_port *p = port; + struct sw_evdev *sw = (void *)p->sw; + uint32_t sw_inflights = rte_atomic32_read(&sw->inflights); + uint32_t credit_update_quanta = sw->credit_update_quanta; + int new = 0; + + if (num > PORT_ENQUEUE_MAX_BURST_SIZE) + num = PORT_ENQUEUE_MAX_BURST_SIZE; + + for (i = 0; i < num; i++) + new += (ev[i].op == RTE_EVENT_OP_NEW); + + if (unlikely(new > 0 && p->inflight_max < sw_inflights)) + return 0; + + if (p->inflight_credits < new) { + /* check if event enqueue brings port over max threshold */ + if (sw_inflights + credit_update_quanta > sw->nb_events_limit) + return 0; + + rte_atomic32_add(&sw->inflights, credit_update_quanta); + p->inflight_credits += (credit_update_quanta); + + /* If there are fewer inflight credits than new events, limit + * the number of enqueued events. + */ + num = (p->inflight_credits < new) ? p->inflight_credits : new; + } + + for (i = 0; i < num; i++) { + int op = ev[i].op; + int outstanding = p->outstanding_releases > 0; + const uint8_t invalid_qid = (ev[i].queue_id >= sw->qid_count); + + p->inflight_credits -= (op == RTE_EVENT_OP_NEW); + p->inflight_credits += (op == RTE_EVENT_OP_RELEASE) * + outstanding; + + new_ops[i] = sw_qe_flag_map[op]; + new_ops[i] &= ~(invalid_qid << QE_FLAG_VALID_SHIFT); + + /* FWD and RELEASE packets will both resolve to taken (assuming + * correct usage of the API), providing very high correct + * prediction rate. + */ + if ((new_ops[i] & QE_FLAG_COMPLETE) && outstanding) + p->outstanding_releases--; + + /* error case: branch to avoid touching p->stats */ + if (unlikely(invalid_qid && op != RTE_EVENT_OP_RELEASE)) { + p->stats.rx_dropped++; + p->inflight_credits++; + } + } + + /* returns number of events actually enqueued */ + uint32_t enq = enqueue_burst_with_ops(p->rx_worker_ring, ev, i, + new_ops); + if (p->outstanding_releases == 0 && p->last_dequeue_burst_sz != 0) { + uint64_t burst_ticks = rte_get_timer_cycles() - + p->last_dequeue_ticks; + uint64_t burst_pkt_ticks = + burst_ticks / p->last_dequeue_burst_sz; + p->avg_pkt_ticks -= p->avg_pkt_ticks / NUM_SAMPLES; + p->avg_pkt_ticks += burst_pkt_ticks / NUM_SAMPLES; + p->last_dequeue_ticks = 0; + } + + /* Replenish credits if enough releases are performed */ + if (p->inflight_credits >= credit_update_quanta * 2) { + rte_atomic32_sub(&sw->inflights, credit_update_quanta); + p->inflight_credits -= credit_update_quanta; + } + + return enq; +} + +uint16_t +sw_event_enqueue(void *port, const struct rte_event *ev) +{ + return sw_event_enqueue_burst(port, ev, 1); +} + +uint16_t +sw_event_dequeue_burst(void *port, struct rte_event *ev, uint16_t num, + uint64_t wait) +{ + RTE_SET_USED(wait); + struct sw_port *p = (void *)port; + struct rte_event_ring *ring = p->cq_worker_ring; + + /* check that all previous dequeues have been released */ + if (p->implicit_release) { + struct sw_evdev *sw = (void *)p->sw; + uint32_t credit_update_quanta = sw->credit_update_quanta; + uint16_t out_rels = p->outstanding_releases; + uint16_t i; + for (i = 0; i < out_rels; i++) + sw_event_release(p, i); + + /* Replenish credits if enough releases are performed */ + if (p->inflight_credits >= credit_update_quanta * 2) { + rte_atomic32_sub(&sw->inflights, credit_update_quanta); + p->inflight_credits -= credit_update_quanta; + } + } + + /* returns number of events actually dequeued */ + uint16_t ndeq = rte_event_ring_dequeue_burst(ring, ev, num, NULL); + if (unlikely(ndeq == 0)) { + p->zero_polls++; + p->total_polls++; + goto end; + } + + p->outstanding_releases += ndeq; + p->last_dequeue_burst_sz = ndeq; + p->last_dequeue_ticks = rte_get_timer_cycles(); + p->poll_buckets[(ndeq - 1) >> SW_DEQ_STAT_BUCKET_SHIFT]++; + p->total_polls++; + +end: + return ndeq; +} + +uint16_t +sw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait) +{ + return sw_event_dequeue_burst(port, ev, 1, wait); +} diff --git a/src/spdk/dpdk/drivers/event/sw/sw_evdev_xstats.c b/src/spdk/dpdk/drivers/event/sw/sw_evdev_xstats.c new file mode 100644 index 000000000..02f787418 --- /dev/null +++ b/src/spdk/dpdk/drivers/event/sw/sw_evdev_xstats.c @@ -0,0 +1,649 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2017 Intel Corporation + */ + +#include <rte_event_ring.h> +#include "sw_evdev.h" +#include "iq_chunk.h" + +enum xstats_type { + /* common stats */ + rx, + tx, + dropped, + inflight, + calls, + credits, + /* device instance specific */ + no_iq_enq, + no_cq_enq, + /* port_specific */ + rx_used, + rx_free, + tx_used, + tx_free, + pkt_cycles, + poll_return, /* for zero-count and used also for port bucket loop */ + /* qid_specific */ + iq_used, + /* qid port mapping specific */ + pinned, + pkts, /* note: qid-to-port pkts */ +}; + +typedef uint64_t (*xstats_fn)(const struct sw_evdev *dev, + uint16_t obj_idx, /* port or queue id */ + enum xstats_type stat, int extra_arg); + +struct sw_xstats_entry { + struct rte_event_dev_xstats_name name; + xstats_fn fn; + uint16_t obj_idx; + enum xstats_type stat; + enum rte_event_dev_xstats_mode mode; + int extra_arg; + uint8_t reset_allowed; /* when set, this value can be reset */ + uint64_t reset_value; /* an offset to be taken away to emulate resets */ +}; + +static uint64_t +get_dev_stat(const struct sw_evdev *sw, uint16_t obj_idx __rte_unused, + enum xstats_type type, int extra_arg __rte_unused) +{ + switch (type) { + case rx: return sw->stats.rx_pkts; + case tx: return sw->stats.tx_pkts; + case dropped: return sw->stats.rx_dropped; + case calls: return sw->sched_called; + case no_iq_enq: return sw->sched_no_iq_enqueues; + case no_cq_enq: return sw->sched_no_cq_enqueues; + default: return -1; + } +} + +static uint64_t +get_port_stat(const struct sw_evdev *sw, uint16_t obj_idx, + enum xstats_type type, int extra_arg __rte_unused) +{ + const struct sw_port *p = &sw->ports[obj_idx]; + + switch (type) { + case rx: return p->stats.rx_pkts; + case tx: return p->stats.tx_pkts; + case dropped: return p->stats.rx_dropped; + case inflight: return p->inflights; + case pkt_cycles: return p->avg_pkt_ticks; + case calls: return p->total_polls; + case credits: return p->inflight_credits; + case poll_return: return p->zero_polls; + case rx_used: return rte_event_ring_count(p->rx_worker_ring); + case rx_free: return rte_event_ring_free_count(p->rx_worker_ring); + case tx_used: return rte_event_ring_count(p->cq_worker_ring); + case tx_free: return rte_event_ring_free_count(p->cq_worker_ring); + default: return -1; + } +} + +static uint64_t +get_port_bucket_stat(const struct sw_evdev *sw, uint16_t obj_idx, + enum xstats_type type, int extra_arg) +{ + const struct sw_port *p = &sw->ports[obj_idx]; + + switch (type) { + case poll_return: return p->poll_buckets[extra_arg]; + default: return -1; + } +} + +static uint64_t +get_qid_stat(const struct sw_evdev *sw, uint16_t obj_idx, + enum xstats_type type, int extra_arg __rte_unused) +{ + const struct sw_qid *qid = &sw->qids[obj_idx]; + + switch (type) { + case rx: return qid->stats.rx_pkts; + case tx: return qid->stats.tx_pkts; + case dropped: return qid->stats.rx_dropped; + case inflight: + do { + uint64_t infl = 0; + unsigned int i; + for (i = 0; i < RTE_DIM(qid->fids); i++) + infl += qid->fids[i].pcount; + return infl; + } while (0); + break; + default: return -1; + } +} + +static uint64_t +get_qid_iq_stat(const struct sw_evdev *sw, uint16_t obj_idx, + enum xstats_type type, int extra_arg) +{ + const struct sw_qid *qid = &sw->qids[obj_idx]; + const int iq_idx = extra_arg; + + switch (type) { + case iq_used: return iq_count(&qid->iq[iq_idx]); + default: return -1; + } +} + +static uint64_t +get_qid_port_stat(const struct sw_evdev *sw, uint16_t obj_idx, + enum xstats_type type, int extra_arg) +{ + const struct sw_qid *qid = &sw->qids[obj_idx]; + uint16_t port = extra_arg; + + switch (type) { + case pinned: + do { + uint64_t pin = 0; + unsigned int i; + for (i = 0; i < RTE_DIM(qid->fids); i++) + if (qid->fids[i].cq == port) + pin++; + return pin; + } while (0); + break; + case pkts: + return qid->to_port[port]; + default: return -1; + } +} + +int +sw_xstats_init(struct sw_evdev *sw) +{ + /* + * define the stats names and types. Used to build up the device + * xstats array + * There are multiple set of stats: + * - device-level, + * - per-port, + * - per-port-dequeue-burst-sizes + * - per-qid, + * - per-iq + * - per-port-per-qid + * + * For each of these sets, we have three parallel arrays, one for the + * names, the other for the stat type parameter to be passed in the fn + * call to get that stat. The third array allows resetting or not. + * All these arrays must be kept in sync + */ + static const char * const dev_stats[] = { "rx", "tx", "drop", + "sched_calls", "sched_no_iq_enq", "sched_no_cq_enq", + }; + static const enum xstats_type dev_types[] = { rx, tx, dropped, + calls, no_iq_enq, no_cq_enq, + }; + /* all device stats are allowed to be reset */ + + static const char * const port_stats[] = {"rx", "tx", "drop", + "inflight", "avg_pkt_cycles", "credits", + "rx_ring_used", "rx_ring_free", + "cq_ring_used", "cq_ring_free", + "dequeue_calls", "dequeues_returning_0", + }; + static const enum xstats_type port_types[] = { rx, tx, dropped, + inflight, pkt_cycles, credits, + rx_used, rx_free, tx_used, tx_free, + calls, poll_return, + }; + static const uint8_t port_reset_allowed[] = {1, 1, 1, + 0, 1, 0, + 0, 0, 0, 0, + 1, 1, + }; + + static const char * const port_bucket_stats[] = { + "dequeues_returning" }; + static const enum xstats_type port_bucket_types[] = { poll_return }; + /* all bucket dequeues are allowed to be reset, handled in loop below */ + + static const char * const qid_stats[] = {"rx", "tx", "drop", + "inflight" + }; + static const enum xstats_type qid_types[] = { rx, tx, dropped, + inflight + }; + static const uint8_t qid_reset_allowed[] = {1, 1, 1, + 0 + }; + + static const char * const qid_iq_stats[] = { "used" }; + static const enum xstats_type qid_iq_types[] = { iq_used }; + /* reset allowed */ + + static const char * const qid_port_stats[] = { "pinned_flows", + "packets" + }; + static const enum xstats_type qid_port_types[] = { pinned, pkts }; + static const uint8_t qid_port_reset_allowed[] = {0, 1}; + /* reset allowed */ + /* ---- end of stat definitions ---- */ + + /* check sizes, since a missed comma can lead to strings being + * joined by the compiler. + */ + RTE_BUILD_BUG_ON(RTE_DIM(dev_stats) != RTE_DIM(dev_types)); + RTE_BUILD_BUG_ON(RTE_DIM(port_stats) != RTE_DIM(port_types)); + RTE_BUILD_BUG_ON(RTE_DIM(qid_stats) != RTE_DIM(qid_types)); + RTE_BUILD_BUG_ON(RTE_DIM(qid_iq_stats) != RTE_DIM(qid_iq_types)); + RTE_BUILD_BUG_ON(RTE_DIM(qid_port_stats) != RTE_DIM(qid_port_types)); + RTE_BUILD_BUG_ON(RTE_DIM(port_bucket_stats) != + RTE_DIM(port_bucket_types)); + + RTE_BUILD_BUG_ON(RTE_DIM(port_stats) != RTE_DIM(port_reset_allowed)); + RTE_BUILD_BUG_ON(RTE_DIM(qid_stats) != RTE_DIM(qid_reset_allowed)); + + /* other vars */ + const uint32_t cons_bkt_shift = + (MAX_SW_CONS_Q_DEPTH >> SW_DEQ_STAT_BUCKET_SHIFT); + const unsigned int count = RTE_DIM(dev_stats) + + sw->port_count * RTE_DIM(port_stats) + + sw->port_count * RTE_DIM(port_bucket_stats) * + (cons_bkt_shift + 1) + + sw->qid_count * RTE_DIM(qid_stats) + + sw->qid_count * SW_IQS_MAX * RTE_DIM(qid_iq_stats) + + sw->qid_count * sw->port_count * + RTE_DIM(qid_port_stats); + unsigned int i, port, qid, iq, bkt, stat = 0; + + sw->xstats = rte_zmalloc_socket(NULL, sizeof(sw->xstats[0]) * count, 0, + sw->data->socket_id); + if (sw->xstats == NULL) + return -ENOMEM; + +#define sname sw->xstats[stat].name.name + for (i = 0; i < RTE_DIM(dev_stats); i++, stat++) { + sw->xstats[stat] = (struct sw_xstats_entry){ + .fn = get_dev_stat, + .stat = dev_types[i], + .mode = RTE_EVENT_DEV_XSTATS_DEVICE, + .reset_allowed = 1, + }; + snprintf(sname, sizeof(sname), "dev_%s", dev_stats[i]); + } + sw->xstats_count_mode_dev = stat; + + for (port = 0; port < sw->port_count; port++) { + sw->xstats_offset_for_port[port] = stat; + + uint32_t count_offset = stat; + + for (i = 0; i < RTE_DIM(port_stats); i++, stat++) { + sw->xstats[stat] = (struct sw_xstats_entry){ + .fn = get_port_stat, + .obj_idx = port, + .stat = port_types[i], + .mode = RTE_EVENT_DEV_XSTATS_PORT, + .reset_allowed = port_reset_allowed[i], + }; + snprintf(sname, sizeof(sname), "port_%u_%s", + port, port_stats[i]); + } + + for (bkt = 0; bkt < (rte_event_ring_get_capacity( + sw->ports[port].cq_worker_ring) >> + SW_DEQ_STAT_BUCKET_SHIFT) + 1; bkt++) { + for (i = 0; i < RTE_DIM(port_bucket_stats); i++) { + sw->xstats[stat] = (struct sw_xstats_entry){ + .fn = get_port_bucket_stat, + .obj_idx = port, + .stat = port_bucket_types[i], + .mode = RTE_EVENT_DEV_XSTATS_PORT, + .extra_arg = bkt, + .reset_allowed = 1, + }; + snprintf(sname, sizeof(sname), + "port_%u_%s_%u-%u", + port, port_bucket_stats[i], + (bkt << SW_DEQ_STAT_BUCKET_SHIFT) + 1, + (bkt + 1) << SW_DEQ_STAT_BUCKET_SHIFT); + stat++; + } + } + + sw->xstats_count_per_port[port] = stat - count_offset; + } + + sw->xstats_count_mode_port = stat - sw->xstats_count_mode_dev; + + for (qid = 0; qid < sw->qid_count; qid++) { + uint32_t count_offset = stat; + sw->xstats_offset_for_qid[qid] = stat; + + for (i = 0; i < RTE_DIM(qid_stats); i++, stat++) { + sw->xstats[stat] = (struct sw_xstats_entry){ + .fn = get_qid_stat, + .obj_idx = qid, + .stat = qid_types[i], + .mode = RTE_EVENT_DEV_XSTATS_QUEUE, + .reset_allowed = qid_reset_allowed[i], + }; + snprintf(sname, sizeof(sname), "qid_%u_%s", + qid, qid_stats[i]); + } + for (iq = 0; iq < SW_IQS_MAX; iq++) + for (i = 0; i < RTE_DIM(qid_iq_stats); i++, stat++) { + sw->xstats[stat] = (struct sw_xstats_entry){ + .fn = get_qid_iq_stat, + .obj_idx = qid, + .stat = qid_iq_types[i], + .mode = RTE_EVENT_DEV_XSTATS_QUEUE, + .extra_arg = iq, + .reset_allowed = 0, + }; + snprintf(sname, sizeof(sname), + "qid_%u_iq_%u_%s", + qid, iq, + qid_iq_stats[i]); + } + + for (port = 0; port < sw->port_count; port++) + for (i = 0; i < RTE_DIM(qid_port_stats); i++, stat++) { + sw->xstats[stat] = (struct sw_xstats_entry){ + .fn = get_qid_port_stat, + .obj_idx = qid, + .stat = qid_port_types[i], + .mode = RTE_EVENT_DEV_XSTATS_QUEUE, + .extra_arg = port, + .reset_allowed = + qid_port_reset_allowed[i], + }; + snprintf(sname, sizeof(sname), + "qid_%u_port_%u_%s", + qid, port, + qid_port_stats[i]); + } + + sw->xstats_count_per_qid[qid] = stat - count_offset; + } + + sw->xstats_count_mode_queue = stat - + (sw->xstats_count_mode_dev + sw->xstats_count_mode_port); +#undef sname + + sw->xstats_count = stat; + + return stat; +} + +int +sw_xstats_uninit(struct sw_evdev *sw) +{ + rte_free(sw->xstats); + sw->xstats_count = 0; + return 0; +} + +int +sw_xstats_get_names(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + struct rte_event_dev_xstats_name *xstats_names, + unsigned int *ids, unsigned int size) +{ + const struct sw_evdev *sw = sw_pmd_priv_const(dev); + unsigned int i; + unsigned int xidx = 0; + + uint32_t xstats_mode_count = 0; + uint32_t start_offset = 0; + + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: + xstats_mode_count = sw->xstats_count_mode_dev; + break; + case RTE_EVENT_DEV_XSTATS_PORT: + if (queue_port_id >= (signed int)sw->port_count) + break; + xstats_mode_count = sw->xstats_count_per_port[queue_port_id]; + start_offset = sw->xstats_offset_for_port[queue_port_id]; + break; + case RTE_EVENT_DEV_XSTATS_QUEUE: + if (queue_port_id >= (signed int)sw->qid_count) + break; + xstats_mode_count = sw->xstats_count_per_qid[queue_port_id]; + start_offset = sw->xstats_offset_for_qid[queue_port_id]; + break; + default: + SW_LOG_ERR("Invalid mode received in sw_xstats_get_names()\n"); + return -EINVAL; + }; + + if (xstats_mode_count > size || !ids || !xstats_names) + return xstats_mode_count; + + for (i = 0; i < sw->xstats_count && xidx < size; i++) { + if (sw->xstats[i].mode != mode) + continue; + + if (mode != RTE_EVENT_DEV_XSTATS_DEVICE && + queue_port_id != sw->xstats[i].obj_idx) + continue; + + xstats_names[xidx] = sw->xstats[i].name; + if (ids) + ids[xidx] = start_offset + xidx; + xidx++; + } + return xidx; +} + +static int +sw_xstats_update(struct sw_evdev *sw, enum rte_event_dev_xstats_mode mode, + uint8_t queue_port_id, const unsigned int ids[], + uint64_t values[], unsigned int n, const uint32_t reset, + const uint32_t ret_if_n_lt_nstats) +{ + unsigned int i; + unsigned int xidx = 0; + RTE_SET_USED(mode); + RTE_SET_USED(queue_port_id); + + uint32_t xstats_mode_count = 0; + + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: + xstats_mode_count = sw->xstats_count_mode_dev; + break; + case RTE_EVENT_DEV_XSTATS_PORT: + if (queue_port_id >= (signed int)sw->port_count) + goto invalid_value; + xstats_mode_count = sw->xstats_count_per_port[queue_port_id]; + break; + case RTE_EVENT_DEV_XSTATS_QUEUE: + if (queue_port_id >= (signed int)sw->qid_count) + goto invalid_value; + xstats_mode_count = sw->xstats_count_per_qid[queue_port_id]; + break; + default: + SW_LOG_ERR("Invalid mode received in sw_xstats_get()\n"); + goto invalid_value; + }; + + /* this function can check num stats and return them (xstats_get() style + * behaviour) or ignore n for reset() of a single stat style behaviour. + */ + if (ret_if_n_lt_nstats && xstats_mode_count > n) + return xstats_mode_count; + + for (i = 0; i < n && xidx < xstats_mode_count; i++) { + struct sw_xstats_entry *xs = &sw->xstats[ids[i]]; + if (ids[i] > sw->xstats_count || xs->mode != mode) + continue; + + if (mode != RTE_EVENT_DEV_XSTATS_DEVICE && + queue_port_id != xs->obj_idx) + continue; + + uint64_t val = xs->fn(sw, xs->obj_idx, xs->stat, xs->extra_arg) + - xs->reset_value; + + if (values) + values[xidx] = val; + + if (xs->reset_allowed && reset) + xs->reset_value += val; + + xidx++; + } + + return xidx; +invalid_value: + return -EINVAL; +} + +int +sw_xstats_get(const struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id, + const unsigned int ids[], uint64_t values[], unsigned int n) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + const uint32_t reset = 0; + const uint32_t ret_n_lt_stats = 0; + return sw_xstats_update(sw, mode, queue_port_id, ids, values, n, + reset, ret_n_lt_stats); +} + +uint64_t +sw_xstats_get_by_name(const struct rte_eventdev *dev, + const char *name, unsigned int *id) +{ + const struct sw_evdev *sw = sw_pmd_priv_const(dev); + unsigned int i; + + for (i = 0; i < sw->xstats_count; i++) { + struct sw_xstats_entry *xs = &sw->xstats[i]; + if (strncmp(xs->name.name, name, + RTE_EVENT_DEV_XSTATS_NAME_SIZE) == 0){ + if (id != NULL) + *id = i; + return xs->fn(sw, xs->obj_idx, xs->stat, xs->extra_arg) + - xs->reset_value; + } + } + if (id != NULL) + *id = (uint32_t)-1; + return (uint64_t)-1; +} + +static void +sw_xstats_reset_range(struct sw_evdev *sw, uint32_t start, uint32_t num) +{ + uint32_t i; + for (i = start; i < start + num; i++) { + struct sw_xstats_entry *xs = &sw->xstats[i]; + if (!xs->reset_allowed) + continue; + + uint64_t val = xs->fn(sw, xs->obj_idx, xs->stat, xs->extra_arg); + xs->reset_value = val; + } +} + +static int +sw_xstats_reset_queue(struct sw_evdev *sw, uint8_t queue_id, + const uint32_t ids[], uint32_t nb_ids) +{ + const uint32_t reset = 1; + const uint32_t ret_n_lt_stats = 0; + if (ids) { + uint32_t nb_reset = sw_xstats_update(sw, + RTE_EVENT_DEV_XSTATS_QUEUE, + queue_id, ids, NULL, nb_ids, + reset, ret_n_lt_stats); + return nb_reset == nb_ids ? 0 : -EINVAL; + } + + if (ids == NULL) + sw_xstats_reset_range(sw, sw->xstats_offset_for_qid[queue_id], + sw->xstats_count_per_qid[queue_id]); + + return 0; +} + +static int +sw_xstats_reset_port(struct sw_evdev *sw, uint8_t port_id, + const uint32_t ids[], uint32_t nb_ids) +{ + const uint32_t reset = 1; + const uint32_t ret_n_lt_stats = 0; + int offset = sw->xstats_offset_for_port[port_id]; + int nb_stat = sw->xstats_count_per_port[port_id]; + + if (ids) { + uint32_t nb_reset = sw_xstats_update(sw, + RTE_EVENT_DEV_XSTATS_PORT, port_id, + ids, NULL, nb_ids, + reset, ret_n_lt_stats); + return nb_reset == nb_ids ? 0 : -EINVAL; + } + + sw_xstats_reset_range(sw, offset, nb_stat); + return 0; +} + +static int +sw_xstats_reset_dev(struct sw_evdev *sw, const uint32_t ids[], uint32_t nb_ids) +{ + uint32_t i; + if (ids) { + for (i = 0; i < nb_ids; i++) { + uint32_t id = ids[i]; + if (id >= sw->xstats_count_mode_dev) + return -EINVAL; + sw_xstats_reset_range(sw, id, 1); + } + } else { + for (i = 0; i < sw->xstats_count_mode_dev; i++) + sw_xstats_reset_range(sw, i, 1); + } + + return 0; +} + +int +sw_xstats_reset(struct rte_eventdev *dev, + enum rte_event_dev_xstats_mode mode, + int16_t queue_port_id, + const uint32_t ids[], + uint32_t nb_ids) +{ + struct sw_evdev *sw = sw_pmd_priv(dev); + uint32_t i, err; + + /* handle -1 for queue_port_id here, looping over all ports/queues */ + switch (mode) { + case RTE_EVENT_DEV_XSTATS_DEVICE: + sw_xstats_reset_dev(sw, ids, nb_ids); + break; + case RTE_EVENT_DEV_XSTATS_PORT: + if (queue_port_id == -1) { + for (i = 0; i < sw->port_count; i++) { + err = sw_xstats_reset_port(sw, i, ids, nb_ids); + if (err) + return -EINVAL; + } + } else if (queue_port_id < (int16_t)sw->port_count) + sw_xstats_reset_port(sw, queue_port_id, ids, nb_ids); + break; + case RTE_EVENT_DEV_XSTATS_QUEUE: + if (queue_port_id == -1) { + for (i = 0; i < sw->qid_count; i++) { + err = sw_xstats_reset_queue(sw, i, ids, nb_ids); + if (err) + return -EINVAL; + } + } else if (queue_port_id < (int16_t)sw->qid_count) + sw_xstats_reset_queue(sw, queue_port_id, ids, nb_ids); + break; + }; + + return 0; +} |