diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/seastar/dpdk/examples/quota_watermark/qw | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/examples/quota_watermark/qw')
-rw-r--r-- | src/seastar/dpdk/examples/quota_watermark/qw/Makefile | 22 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/quota_watermark/qw/args.c | 78 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/quota_watermark/qw/args.h | 12 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/quota_watermark/qw/init.c | 164 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/quota_watermark/qw/init.h | 14 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/quota_watermark/qw/main.c | 365 | ||||
-rw-r--r-- | src/seastar/dpdk/examples/quota_watermark/qw/main.h | 31 |
7 files changed, 686 insertions, 0 deletions
diff --git a/src/seastar/dpdk/examples/quota_watermark/qw/Makefile b/src/seastar/dpdk/examples/quota_watermark/qw/Makefile new file mode 100644 index 000000000..3f10f01c3 --- /dev/null +++ b/src/seastar/dpdk/examples/quota_watermark/qw/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2014 Intel Corporation + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, detect a build directory, by looking for a path with a .config +RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config))))) + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = qw + +# all source are stored in SRCS-y +SRCS-y := args.c init.c main.c + +CFLAGS += -O3 -DQW_SOFTWARE_FC +CFLAGS += $(WERROR_FLAGS) + +include $(RTE_SDK)/mk/rte.extapp.mk diff --git a/src/seastar/dpdk/examples/quota_watermark/qw/args.c b/src/seastar/dpdk/examples/quota_watermark/qw/args.c new file mode 100644 index 000000000..a750ec258 --- /dev/null +++ b/src/seastar/dpdk/examples/quota_watermark/qw/args.c @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <rte_common.h> +#include <rte_lcore.h> + +#include "args.h" + + +unsigned int portmask = 0; + + +static void +usage(const char *prgname) +{ + fprintf(stderr, "Usage: %s [EAL args] -- -p <portmask>\n" + "-p PORTMASK: hexadecimal bitmask of NIC ports to configure\n", + prgname); +} + +static unsigned long +parse_portmask(const char *portmask_str) +{ + return strtoul(portmask_str, NULL, 16); +} + +static void +check_core_count(void) +{ + if (rte_lcore_count() < 3) + rte_exit(EXIT_FAILURE, + "At least 3 cores need to be passed in the coremask\n"); +} + +static void +check_portmask_value(unsigned int portmask) +{ + unsigned int port_nb = 0; + + port_nb = __builtin_popcount(portmask); + + if (port_nb == 0) + rte_exit(EXIT_FAILURE, + "At least 2 ports need to be passed in the portmask\n"); + + if (port_nb % 2 != 0) + rte_exit(EXIT_FAILURE, + "An even number of ports is required in the portmask\n"); +} + +int +parse_qw_args(int argc, char **argv) +{ + int opt; + + while ((opt = getopt(argc, argv, "h:p:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + break; + case 'p': + portmask = parse_portmask(optarg); + break; + default: + usage(argv[0]); + } + } + + check_core_count(); + check_portmask_value(portmask); + + return 0; +} diff --git a/src/seastar/dpdk/examples/quota_watermark/qw/args.h b/src/seastar/dpdk/examples/quota_watermark/qw/args.h new file mode 100644 index 000000000..ab777db01 --- /dev/null +++ b/src/seastar/dpdk/examples/quota_watermark/qw/args.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _ARGS_H_ +#define _ARGS_H_ + +extern unsigned int portmask; + +int parse_qw_args(int argc, char **argv); + +#endif /* _ARGS_H_ */ diff --git a/src/seastar/dpdk/examples/quota_watermark/qw/init.c b/src/seastar/dpdk/examples/quota_watermark/qw/init.c new file mode 100644 index 000000000..5a0f64f45 --- /dev/null +++ b/src/seastar/dpdk/examples/quota_watermark/qw/init.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> + +#include <rte_eal.h> + +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_memzone.h> +#include <rte_ring.h> +#include <rte_string_fns.h> + +#include "args.h" +#include "init.h" +#include "main.h" +#include "../include/conf.h" + + +static struct rte_eth_conf port_conf = { + .rxmode = { + .split_hdr_size = 0, + }, + .txmode = { + .mq_mode = ETH_DCB_NONE, + }, +}; + +static struct rte_eth_fc_conf fc_conf = { + .mode = RTE_FC_TX_PAUSE, + .high_water = 80 * 510 / 100, + .low_water = 60 * 510 / 100, + .pause_time = 1337, + .send_xon = 0, +}; + + +void configure_eth_port(uint16_t port_id) +{ + int ret; + uint16_t nb_rxd = RX_DESC_PER_QUEUE; + uint16_t nb_txd = TX_DESC_PER_QUEUE; + struct rte_eth_rxconf rxq_conf; + struct rte_eth_txconf txq_conf; + struct rte_eth_dev_info dev_info; + struct rte_eth_conf local_port_conf = port_conf; + + rte_eth_dev_stop(port_id); + + rte_eth_dev_info_get(port_id, &dev_info); + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + local_port_conf.txmode.offloads |= + DEV_TX_OFFLOAD_MBUF_FAST_FREE; + ret = rte_eth_dev_configure(port_id, 1, 1, &local_port_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot configure port %u (error %d)\n", + (unsigned int) port_id, ret); + + ret = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Cannot adjust number of descriptors for port %u (error %d)\n", + (unsigned int) port_id, ret); + + /* Initialize the port's RX queue */ + rxq_conf = dev_info.default_rxconf; + rxq_conf.offloads = local_port_conf.rxmode.offloads; + ret = rte_eth_rx_queue_setup(port_id, 0, nb_rxd, + rte_eth_dev_socket_id(port_id), + &rxq_conf, + mbuf_pool); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Failed to setup RX queue on port %u (error %d)\n", + (unsigned int) port_id, ret); + + /* Initialize the port's TX queue */ + txq_conf = dev_info.default_txconf; + txq_conf.offloads = local_port_conf.txmode.offloads; + ret = rte_eth_tx_queue_setup(port_id, 0, nb_txd, + rte_eth_dev_socket_id(port_id), + &txq_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Failed to setup TX queue on port %u (error %d)\n", + (unsigned int) port_id, ret); + + /* Initialize the port's flow control */ + ret = rte_eth_dev_flow_ctrl_set(port_id, &fc_conf); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "Failed to setup hardware flow control on port %u (error %d)\n", + (unsigned int) port_id, ret); + + /* Start the port */ + ret = rte_eth_dev_start(port_id); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Failed to start port %u (error %d)\n", + (unsigned int) port_id, ret); + + /* Put it in promiscuous mode */ + rte_eth_promiscuous_enable(port_id); +} + +void +init_dpdk(void) +{ + if (rte_eth_dev_count_avail() < 2) + rte_exit(EXIT_FAILURE, "Not enough ethernet port available\n"); +} + +void init_ring(int lcore_id, uint16_t port_id) +{ + struct rte_ring *ring; + char ring_name[RTE_RING_NAMESIZE]; + + snprintf(ring_name, RTE_RING_NAMESIZE, + "core%d_port%d", lcore_id, port_id); + ring = rte_ring_create(ring_name, RING_SIZE, rte_socket_id(), + RING_F_SP_ENQ | RING_F_SC_DEQ); + + if (ring == NULL) + rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno)); + + *high_watermark = 80 * RING_SIZE / 100; + + rings[lcore_id][port_id] = ring; +} + +void +pair_ports(void) +{ + uint16_t i, j; + + /* Pair ports with their "closest neighbour" in the portmask */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) + if (is_bit_set(i, portmask)) + for (j = i + 1; j < RTE_MAX_ETHPORTS; j++) + if (is_bit_set(j, portmask)) { + port_pairs[i] = j; + port_pairs[j] = i; + i = j; + break; + } +} + +void +setup_shared_variables(void) +{ + const struct rte_memzone *qw_memzone; + + qw_memzone = rte_memzone_reserve(QUOTA_WATERMARK_MEMZONE_NAME, + 3 * sizeof(int), rte_socket_id(), 0); + if (qw_memzone == NULL) + rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno)); + + quota = qw_memzone->addr; + low_watermark = (unsigned int *) qw_memzone->addr + 1; + high_watermark = (unsigned int *) qw_memzone->addr + 2; +} diff --git a/src/seastar/dpdk/examples/quota_watermark/qw/init.h b/src/seastar/dpdk/examples/quota_watermark/qw/init.h new file mode 100644 index 000000000..e0c90df72 --- /dev/null +++ b/src/seastar/dpdk/examples/quota_watermark/qw/init.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _INIT_H_ +#define _INIT_H_ + +void configure_eth_port(uint16_t port_id); +void init_dpdk(void); +void init_ring(int lcore_id, uint16_t port_id); +void pair_ports(void); +void setup_shared_variables(void); + +#endif /* _INIT_H_ */ diff --git a/src/seastar/dpdk/examples/quota_watermark/qw/main.c b/src/seastar/dpdk/examples/quota_watermark/qw/main.c new file mode 100644 index 000000000..c55d38744 --- /dev/null +++ b/src/seastar/dpdk/examples/quota_watermark/qw/main.c @@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <rte_eal.h> + +#include <rte_common.h> +#include <rte_debug.h> +#include <rte_errno.h> +#include <rte_ethdev.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#include <rte_log.h> +#include <rte_mbuf.h> +#include <rte_ring.h> + +#include <rte_byteorder.h> + +#include "args.h" +#include "main.h" +#include "init.h" +#include "../include/conf.h" + + +#ifdef QW_SOFTWARE_FC +#define SEND_PAUSE_FRAME(port_id, duration) send_pause_frame(port_id, duration) +#else +#define SEND_PAUSE_FRAME(port_id, duration) do { } while(0) +#endif + +#define ETHER_TYPE_FLOW_CONTROL 0x8808 + +struct ether_fc_frame { + uint16_t opcode; + uint16_t param; +} __attribute__((__packed__)); + + +int *quota; +unsigned int *low_watermark; +unsigned int *high_watermark; + +uint16_t port_pairs[RTE_MAX_ETHPORTS]; + +struct rte_ring *rings[RTE_MAX_LCORE][RTE_MAX_ETHPORTS]; +struct rte_mempool *mbuf_pool; + + +static void send_pause_frame(uint16_t port_id, uint16_t duration) +{ + struct rte_mbuf *mbuf; + struct ether_fc_frame *pause_frame; + struct ether_hdr *hdr; + struct ether_addr mac_addr; + + RTE_LOG_DP(DEBUG, USER1, + "Sending PAUSE frame (duration=%d) on port %d\n", + duration, port_id); + + /* Get a mbuf from the pool */ + mbuf = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(mbuf == NULL)) + return; + + /* Prepare a PAUSE frame */ + hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + pause_frame = (struct ether_fc_frame *) &hdr[1]; + + rte_eth_macaddr_get(port_id, &mac_addr); + ether_addr_copy(&mac_addr, &hdr->s_addr); + + void *tmp = &hdr->d_addr.addr_bytes[0]; + *((uint64_t *)tmp) = 0x010000C28001ULL; + + hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_FLOW_CONTROL); + + pause_frame->opcode = rte_cpu_to_be_16(0x0001); + pause_frame->param = rte_cpu_to_be_16(duration); + + mbuf->pkt_len = 60; + mbuf->data_len = 60; + + rte_eth_tx_burst(port_id, 0, &mbuf, 1); +} + +/** + * Get the previous enabled lcore ID + * + * @param lcore_id + * The current lcore ID. + * @return + * The previous enabled lcore_id or -1 if not found. + */ +static unsigned int +get_previous_lcore_id(unsigned int lcore_id) +{ + int i; + + for (i = lcore_id - 1; i >= 0; i--) + if (rte_lcore_is_enabled(i)) + return i; + + return -1; +} + +/** + * Get the last enabled lcore ID + * + * @return + * The last enabled lcore_id. + */ +static unsigned int +get_last_lcore_id(void) +{ + int i; + + for (i = RTE_MAX_LCORE; i >= 0; i--) + if (rte_lcore_is_enabled(i)) + return i; + + return 0; +} + +static void +receive_stage(__attribute__((unused)) void *args) +{ + int i, ret; + + uint16_t port_id; + uint16_t nb_rx_pkts; + + unsigned int lcore_id; + unsigned int free; + + struct rte_mbuf *pkts[MAX_PKT_QUOTA]; + struct rte_ring *ring; + enum ring_state ring_state[RTE_MAX_ETHPORTS] = { RING_READY }; + + lcore_id = rte_lcore_id(); + + RTE_LOG(INFO, USER1, + "%s() started on core %u\n", __func__, lcore_id); + + while (1) { + + /* Process each port round robin style */ + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) { + + if (!is_bit_set(port_id, portmask)) + continue; + + ring = rings[lcore_id][port_id]; + + if (ring_state[port_id] != RING_READY) { + if (rte_ring_count(ring) > *low_watermark) + continue; + else + ring_state[port_id] = RING_READY; + } + + /* Enqueue received packets on the RX ring */ + nb_rx_pkts = rte_eth_rx_burst(port_id, 0, pkts, + (uint16_t) *quota); + ret = rte_ring_enqueue_bulk(ring, (void *) pkts, + nb_rx_pkts, &free); + if (RING_SIZE - free > *high_watermark) { + ring_state[port_id] = RING_OVERLOADED; + send_pause_frame(port_id, 1337); + } + + if (ret == 0) { + + /* + * Return mbufs to the pool, + * effectively dropping packets + */ + for (i = 0; i < nb_rx_pkts; i++) + rte_pktmbuf_free(pkts[i]); + } + } + } +} + +static int +pipeline_stage(__attribute__((unused)) void *args) +{ + int i, ret; + int nb_dq_pkts; + + uint16_t port_id; + + unsigned int lcore_id, previous_lcore_id; + unsigned int free; + + void *pkts[MAX_PKT_QUOTA]; + struct rte_ring *rx, *tx; + enum ring_state ring_state[RTE_MAX_ETHPORTS] = { RING_READY }; + + lcore_id = rte_lcore_id(); + previous_lcore_id = get_previous_lcore_id(lcore_id); + + RTE_LOG(INFO, USER1, + "%s() started on core %u - processing packets from core %u\n", + __func__, lcore_id, previous_lcore_id); + + while (1) { + + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) { + + if (!is_bit_set(port_id, portmask)) + continue; + + tx = rings[lcore_id][port_id]; + rx = rings[previous_lcore_id][port_id]; + + if (ring_state[port_id] != RING_READY) { + if (rte_ring_count(tx) > *low_watermark) + continue; + else + ring_state[port_id] = RING_READY; + } + + /* Dequeue up to quota mbuf from rx */ + nb_dq_pkts = rte_ring_dequeue_burst(rx, pkts, + *quota, NULL); + if (unlikely(nb_dq_pkts < 0)) + continue; + + /* Enqueue them on tx */ + ret = rte_ring_enqueue_bulk(tx, pkts, + nb_dq_pkts, &free); + if (RING_SIZE - free > *high_watermark) + ring_state[port_id] = RING_OVERLOADED; + + if (ret == 0) { + + /* + * Return mbufs to the pool, + * effectively dropping packets + */ + for (i = 0; i < nb_dq_pkts; i++) + rte_pktmbuf_free(pkts[i]); + } + } + } + + return 0; +} + +static int +send_stage(__attribute__((unused)) void *args) +{ + uint16_t nb_dq_pkts; + + uint16_t port_id; + uint16_t dest_port_id; + + unsigned int lcore_id, previous_lcore_id; + + struct rte_ring *tx; + struct rte_mbuf *tx_pkts[MAX_PKT_QUOTA]; + + lcore_id = rte_lcore_id(); + previous_lcore_id = get_previous_lcore_id(lcore_id); + + RTE_LOG(INFO, USER1, + "%s() started on core %u - processing packets from core %u\n", + __func__, lcore_id, previous_lcore_id); + + while (1) { + + /* Process each ring round robin style */ + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) { + + if (!is_bit_set(port_id, portmask)) + continue; + + dest_port_id = port_pairs[port_id]; + tx = rings[previous_lcore_id][port_id]; + + if (rte_ring_empty(tx)) + continue; + + /* Dequeue packets from tx and send them */ + nb_dq_pkts = (uint16_t) rte_ring_dequeue_burst(tx, + (void *) tx_pkts, *quota, NULL); + rte_eth_tx_burst(dest_port_id, 0, tx_pkts, nb_dq_pkts); + + /* TODO: Check if nb_dq_pkts == nb_tx_pkts? */ + } + } + + return 0; +} + +int +main(int argc, char **argv) +{ + int ret; + unsigned int lcore_id, master_lcore_id, last_lcore_id; + + uint16_t port_id; + + rte_log_set_global_level(RTE_LOG_INFO); + + ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n"); + + argc -= ret; + argv += ret; + + init_dpdk(); + setup_shared_variables(); + + *quota = 32; + *low_watermark = 60 * RING_SIZE / 100; + + last_lcore_id = get_last_lcore_id(); + master_lcore_id = rte_get_master_lcore(); + + /* Parse the application's arguments */ + ret = parse_qw_args(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Invalid quota/watermark argument(s)\n"); + + /* Create a pool of mbuf to store packets */ + mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL, 32, 0, + MBUF_DATA_SIZE, rte_socket_id()); + if (mbuf_pool == NULL) + rte_panic("%s\n", rte_strerror(rte_errno)); + + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) + if (is_bit_set(port_id, portmask)) { + configure_eth_port(port_id); + init_ring(master_lcore_id, port_id); + } + + pair_ports(); + + /* + * Start pipeline_connect() on all the available slave lcores + * but the last + */ + for (lcore_id = 0 ; lcore_id < last_lcore_id; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) && + lcore_id != master_lcore_id) { + + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) + if (is_bit_set(port_id, portmask)) + init_ring(lcore_id, port_id); + + rte_eal_remote_launch(pipeline_stage, + NULL, lcore_id); + } + } + + /* Start send_stage() on the last slave core */ + rte_eal_remote_launch(send_stage, NULL, last_lcore_id); + + /* Start receive_stage() on the master core */ + receive_stage(NULL); + + return 0; +} diff --git a/src/seastar/dpdk/examples/quota_watermark/qw/main.h b/src/seastar/dpdk/examples/quota_watermark/qw/main.h new file mode 100644 index 000000000..9903ddc8c --- /dev/null +++ b/src/seastar/dpdk/examples/quota_watermark/qw/main.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _MAIN_H_ +#define _MAIN_H_ + +#include "../include/conf.h" + +enum ring_state { + RING_READY, + RING_OVERLOADED, +}; + +extern int *quota; +extern unsigned int *low_watermark; +extern unsigned int *high_watermark; + +extern uint16_t port_pairs[RTE_MAX_ETHPORTS]; + +extern struct rte_ring *rings[RTE_MAX_LCORE][RTE_MAX_ETHPORTS]; +extern struct rte_mempool *mbuf_pool; + + +static inline int +is_bit_set(int i, unsigned int mask) +{ + return (1 << i) & mask; +} + +#endif /* _MAIN_H_ */ |