diff options
Diffstat (limited to 'src/spdk/dpdk/lib/librte_node')
19 files changed, 2002 insertions, 0 deletions
diff --git a/src/spdk/dpdk/lib/librte_node/Makefile b/src/spdk/dpdk/lib/librte_node/Makefile new file mode 100644 index 000000000..9dee8b4eb --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(C) 2020 Marvell International Ltd. +# + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_node.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +# Strict-aliasing rules are violated by uint8_t[] to context size casts. +CFLAGS += -fno-strict-aliasing +LDLIBS += -lrte_eal -lrte_graph -lrte_mbuf -lrte_lpm -lrte_ethdev -lrte_mempool + +EXPORT_MAP := rte_node_version.map + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_NODE) += null.c +SRCS-$(CONFIG_RTE_LIBRTE_NODE) += log.c +SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ethdev_rx.c +SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ethdev_tx.c +SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ethdev_ctrl.c +SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ip4_lookup.c +SRCS-$(CONFIG_RTE_LIBRTE_NODE) += ip4_rewrite.c +SRCS-$(CONFIG_RTE_LIBRTE_NODE) += pkt_drop.c + +# install header files +SYMLINK-$(CONFIG_RTE_LIBRTE_NODE)-include += rte_node_ip4_api.h +SYMLINK-$(CONFIG_RTE_LIBRTE_NODE)-include += rte_node_eth_api.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/lib/librte_node/ethdev_ctrl.c b/src/spdk/dpdk/lib/librte_node/ethdev_ctrl.c new file mode 100644 index 000000000..13b8b705f --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ethdev_ctrl.c @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#include <rte_debug.h> +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_graph.h> + +#include "rte_node_eth_api.h" + +#include "ethdev_rx_priv.h" +#include "ethdev_tx_priv.h" +#include "ip4_rewrite_priv.h" +#include "node_private.h" + +static struct ethdev_ctrl { + uint16_t nb_graphs; +} ctrl; + +int +rte_node_eth_config(struct rte_node_ethdev_config *conf, uint16_t nb_confs, + uint16_t nb_graphs) +{ + struct rte_node_register *ip4_rewrite_node; + struct ethdev_tx_node_main *tx_node_data; + uint16_t tx_q_used, rx_q_used, port_id; + struct rte_node_register *tx_node; + char name[RTE_NODE_NAMESIZE]; + const char *next_nodes = name; + struct rte_mempool *mp; + int i, j, rc; + uint32_t id; + + ip4_rewrite_node = ip4_rewrite_node_get(); + tx_node_data = ethdev_tx_node_data_get(); + tx_node = ethdev_tx_node_get(); + for (i = 0; i < nb_confs; i++) { + port_id = conf[i].port_id; + + if (!rte_eth_dev_is_valid_port(port_id)) + return -EINVAL; + + /* Check for mbuf minimum private size requirement */ + for (j = 0; j < conf[i].mp_count; j++) { + mp = conf[i].mp[j]; + if (!mp) + continue; + /* Check for minimum private space */ + if (rte_pktmbuf_priv_size(mp) < NODE_MBUF_PRIV2_SIZE) { + node_err("ethdev", + "Minimum mbuf priv size requirement not met by mp %s", + mp->name); + return -EINVAL; + } + } + + rx_q_used = conf[i].num_rx_queues; + tx_q_used = conf[i].num_tx_queues; + /* Check if we have a txq for each worker */ + if (tx_q_used < nb_graphs) + return -EINVAL; + + /* Create node for each rx port queue pair */ + for (j = 0; j < rx_q_used; j++) { + struct ethdev_rx_node_main *rx_node_data; + struct rte_node_register *rx_node; + ethdev_rx_node_elem_t *elem; + + rx_node_data = ethdev_rx_get_node_data_get(); + rx_node = ethdev_rx_node_get(); + snprintf(name, sizeof(name), "%u-%u", port_id, j); + /* Clone a new rx node with same edges as parent */ + id = rte_node_clone(rx_node->id, name); + if (id == RTE_NODE_ID_INVALID) + return -EIO; + + /* Add it to list of ethdev rx nodes for lookup */ + elem = malloc(sizeof(ethdev_rx_node_elem_t)); + memset(elem, 0, sizeof(ethdev_rx_node_elem_t)); + elem->ctx.port_id = port_id; + elem->ctx.queue_id = j; + elem->nid = id; + elem->next = rx_node_data->head; + rx_node_data->head = elem; + + node_dbg("ethdev", "Rx node %s-%s: is at %u", + rx_node->name, name, id); + } + + /* Create a per port tx node from base node */ + snprintf(name, sizeof(name), "%u", port_id); + /* Clone a new node with same edges as parent */ + id = rte_node_clone(tx_node->id, name); + tx_node_data->nodes[port_id] = id; + + node_dbg("ethdev", "Tx node %s-%s: is at %u", tx_node->name, + name, id); + + /* Prepare the actual name of the cloned node */ + snprintf(name, sizeof(name), "ethdev_tx-%u", port_id); + + /* Add this tx port node as next to ip4_rewrite_node */ + rte_node_edge_update(ip4_rewrite_node->id, RTE_EDGE_ID_INVALID, + &next_nodes, 1); + /* Assuming edge id is the last one alloc'ed */ + rc = ip4_rewrite_set_next( + port_id, rte_node_edge_count(ip4_rewrite_node->id) - 1); + if (rc < 0) + return rc; + } + + ctrl.nb_graphs = nb_graphs; + return 0; +} diff --git a/src/spdk/dpdk/lib/librte_node/ethdev_rx.c b/src/spdk/dpdk/lib/librte_node/ethdev_rx.c new file mode 100644 index 000000000..5cc736598 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ethdev_rx.c @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#include <rte_debug.h> +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_graph.h> +#include <rte_graph_worker.h> +#include <rte_mbuf.h> + +#include "ethdev_rx_priv.h" +#include "node_private.h" + +static struct ethdev_rx_node_main ethdev_rx_main; + +static __rte_always_inline uint16_t +ethdev_rx_node_process_inline(struct rte_graph *graph, struct rte_node *node, + uint16_t port, uint16_t queue) +{ + uint16_t count, next_index = ETHDEV_RX_NEXT_IP4_LOOKUP; + + /* Get pkts from port */ + count = rte_eth_rx_burst(port, queue, (struct rte_mbuf **)node->objs, + RTE_GRAPH_BURST_SIZE); + + if (!count) + return 0; + node->idx = count; + /* Enqueue to next node */ + rte_node_next_stream_move(graph, node, next_index); + + return count; +} + +static __rte_always_inline uint16_t +ethdev_rx_node_process(struct rte_graph *graph, struct rte_node *node, + void **objs, uint16_t cnt) +{ + ethdev_rx_node_ctx_t *ctx = (ethdev_rx_node_ctx_t *)node->ctx; + uint16_t n_pkts = 0; + + RTE_SET_USED(objs); + RTE_SET_USED(cnt); + + n_pkts = ethdev_rx_node_process_inline(graph, node, ctx->port_id, + ctx->queue_id); + return n_pkts; +} + +static inline uint32_t +l3_ptype(uint16_t etype, uint32_t ptype) +{ + ptype = ptype & ~RTE_PTYPE_L3_MASK; + if (etype == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) + ptype |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; + else if (etype == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) + ptype |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; + return ptype; +} + +/* Callback for soft ptype parsing */ +static uint16_t +eth_pkt_parse_cb(uint16_t port, uint16_t queue, struct rte_mbuf **mbufs, + uint16_t nb_pkts, uint16_t max_pkts, void *user_param) +{ + struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3; + struct rte_ether_hdr *eth_hdr; + uint16_t etype, n_left; + struct rte_mbuf **pkts; + + RTE_SET_USED(port); + RTE_SET_USED(queue); + RTE_SET_USED(max_pkts); + RTE_SET_USED(user_param); + + pkts = mbufs; + n_left = nb_pkts; + while (n_left >= 12) { + + /* Prefetch next-next mbufs */ + rte_prefetch0(pkts[8]); + rte_prefetch0(pkts[9]); + rte_prefetch0(pkts[10]); + rte_prefetch0(pkts[11]); + + /* Prefetch next mbuf data */ + rte_prefetch0( + rte_pktmbuf_mtod(pkts[4], struct rte_ether_hdr *)); + rte_prefetch0( + rte_pktmbuf_mtod(pkts[5], struct rte_ether_hdr *)); + rte_prefetch0( + rte_pktmbuf_mtod(pkts[6], struct rte_ether_hdr *)); + rte_prefetch0( + rte_pktmbuf_mtod(pkts[7], struct rte_ether_hdr *)); + + mbuf0 = pkts[0]; + mbuf1 = pkts[1]; + mbuf2 = pkts[2]; + mbuf3 = pkts[3]; + pkts += 4; + n_left -= 4; + + /* Extract ptype of mbuf0 */ + eth_hdr = rte_pktmbuf_mtod(mbuf0, struct rte_ether_hdr *); + etype = eth_hdr->ether_type; + mbuf0->packet_type = l3_ptype(etype, 0); + + /* Extract ptype of mbuf1 */ + eth_hdr = rte_pktmbuf_mtod(mbuf1, struct rte_ether_hdr *); + etype = eth_hdr->ether_type; + mbuf1->packet_type = l3_ptype(etype, 0); + + /* Extract ptype of mbuf2 */ + eth_hdr = rte_pktmbuf_mtod(mbuf2, struct rte_ether_hdr *); + etype = eth_hdr->ether_type; + mbuf2->packet_type = l3_ptype(etype, 0); + + /* Extract ptype of mbuf3 */ + eth_hdr = rte_pktmbuf_mtod(mbuf3, struct rte_ether_hdr *); + etype = eth_hdr->ether_type; + mbuf3->packet_type = l3_ptype(etype, 0); + } + + while (n_left > 0) { + mbuf0 = pkts[0]; + + pkts += 1; + n_left -= 1; + + /* Extract ptype of mbuf0 */ + eth_hdr = rte_pktmbuf_mtod(mbuf0, struct rte_ether_hdr *); + etype = eth_hdr->ether_type; + mbuf0->packet_type = l3_ptype(etype, 0); + } + + return nb_pkts; +} + +#define MAX_PTYPES 16 +static int +ethdev_ptype_setup(uint16_t port, uint16_t queue) +{ + uint8_t l3_ipv4 = 0, l3_ipv6 = 0; + uint32_t ptypes[MAX_PTYPES]; + int i, rc; + + /* Check IPv4 & IPv6 ptype support */ + rc = rte_eth_dev_get_supported_ptypes(port, RTE_PTYPE_L3_MASK, ptypes, + MAX_PTYPES); + for (i = 0; i < rc; i++) { + if (ptypes[i] & RTE_PTYPE_L3_IPV4) + l3_ipv4 = 1; + if (ptypes[i] & RTE_PTYPE_L3_IPV6) + l3_ipv6 = 1; + } + + if (!l3_ipv4 || !l3_ipv6) { + node_info("ethdev_rx", + "Enabling ptype callback for required ptypes on port %u\n", + port); + + if (!rte_eth_add_rx_callback(port, queue, eth_pkt_parse_cb, + NULL)) { + node_err("ethdev_rx", + "Failed to add rx ptype cb: port=%d, queue=%d\n", + port, queue); + return -EINVAL; + } + } + + return 0; +} + +static int +ethdev_rx_node_init(const struct rte_graph *graph, struct rte_node *node) +{ + ethdev_rx_node_ctx_t *ctx = (ethdev_rx_node_ctx_t *)node->ctx; + ethdev_rx_node_elem_t *elem = ethdev_rx_main.head; + + RTE_SET_USED(graph); + + while (elem) { + if (elem->nid == node->id) { + /* Update node specific context */ + memcpy(ctx, &elem->ctx, sizeof(ethdev_rx_node_ctx_t)); + break; + } + elem = elem->next; + } + + RTE_VERIFY(elem != NULL); + + /* Check and setup ptype */ + return ethdev_ptype_setup(ctx->port_id, ctx->queue_id); +} + +struct ethdev_rx_node_main * +ethdev_rx_get_node_data_get(void) +{ + return ðdev_rx_main; +} + +static struct rte_node_register ethdev_rx_node_base = { + .process = ethdev_rx_node_process, + .flags = RTE_NODE_SOURCE_F, + .name = "ethdev_rx", + + .init = ethdev_rx_node_init, + + .nb_edges = ETHDEV_RX_NEXT_MAX, + .next_nodes = {[ETHDEV_RX_NEXT_IP4_LOOKUP] = "ip4_lookup"}, +}; + +struct rte_node_register * +ethdev_rx_node_get(void) +{ + return ðdev_rx_node_base; +} + +RTE_NODE_REGISTER(ethdev_rx_node_base); diff --git a/src/spdk/dpdk/lib/librte_node/ethdev_rx_priv.h b/src/spdk/dpdk/lib/librte_node/ethdev_rx_priv.h new file mode 100644 index 000000000..2d7195a36 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ethdev_rx_priv.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ +#ifndef __INCLUDE_ETHDEV_RX_PRIV_H__ +#define __INCLUDE_ETHDEV_RX_PRIV_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_common.h> + +struct ethdev_rx_node_elem; +struct ethdev_rx_node_ctx; +typedef struct ethdev_rx_node_elem ethdev_rx_node_elem_t; +typedef struct ethdev_rx_node_ctx ethdev_rx_node_ctx_t; + +/** + * @internal + * + * Ethernet device Rx node context structure. + */ +struct ethdev_rx_node_ctx { + uint16_t port_id; /**< Port identifier of the Rx node. */ + uint16_t queue_id; /**< Queue identifier of the Rx node. */ +}; + +/** + * @internal + * + * Ethernet device Rx node list element structure. + */ +struct ethdev_rx_node_elem { + struct ethdev_rx_node_elem *next; + /**< Pointer to the next Rx node element. */ + struct ethdev_rx_node_ctx ctx; + /**< Rx node context. */ + rte_node_t nid; + /**< Node identifier of the Rx node. */ +}; + +enum ethdev_rx_next_nodes { + ETHDEV_RX_NEXT_IP4_LOOKUP, + ETHDEV_RX_NEXT_MAX, +}; + +/** + * @internal + * + * Ethernet Rx node main structure. + */ +struct ethdev_rx_node_main { + ethdev_rx_node_elem_t *head; + /**< Pointer to the head Rx node element. */ +}; + +/** + * @internal + * + * Get the Ethernet Rx node data. + * + * @return + * Pointer to Ethernet Rx node data. + */ +struct ethdev_rx_node_main *ethdev_rx_get_node_data_get(void); + +/** + * @internal + * + * Get the Ethernet Rx node. + * + * @retrun + * Pointer to the Ethernet Rx node. + */ +struct rte_node_register *ethdev_rx_node_get(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_ETHDEV_RX_PRIV_H__ */ diff --git a/src/spdk/dpdk/lib/librte_node/ethdev_tx.c b/src/spdk/dpdk/lib/librte_node/ethdev_tx.c new file mode 100644 index 000000000..075149089 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ethdev_tx.c @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#include <rte_debug.h> +#include <rte_ethdev.h> +#include <rte_graph.h> +#include <rte_graph_worker.h> +#include <rte_mbuf.h> + +#include "ethdev_tx_priv.h" + +static struct ethdev_tx_node_main ethdev_tx_main; + +static uint16_t +ethdev_tx_node_process(struct rte_graph *graph, struct rte_node *node, + void **objs, uint16_t nb_objs) +{ + ethdev_tx_node_ctx_t *ctx = (ethdev_tx_node_ctx_t *)node->ctx; + uint16_t port, queue; + uint16_t count; + + /* Get Tx port id */ + port = ctx->port; + queue = ctx->queue; + + count = rte_eth_tx_burst(port, queue, (struct rte_mbuf **)objs, + nb_objs); + + /* Redirect unsent pkts to drop node */ + if (count != nb_objs) { + rte_node_enqueue(graph, node, ETHDEV_TX_NEXT_PKT_DROP, + &objs[count], nb_objs - count); + } + + return count; +} + +static int +ethdev_tx_node_init(const struct rte_graph *graph, struct rte_node *node) +{ + ethdev_tx_node_ctx_t *ctx = (ethdev_tx_node_ctx_t *)node->ctx; + uint64_t port_id = RTE_MAX_ETHPORTS; + int i; + + /* Find our port id */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (ethdev_tx_main.nodes[i] == node->id) { + port_id = i; + break; + } + } + RTE_VERIFY(port_id < RTE_MAX_ETHPORTS); + + /* Update port and queue */ + ctx->port = port_id; + ctx->queue = graph->id; + + return 0; +} + +struct ethdev_tx_node_main * +ethdev_tx_node_data_get(void) +{ + return ðdev_tx_main; +} + +static struct rte_node_register ethdev_tx_node_base = { + .process = ethdev_tx_node_process, + .name = "ethdev_tx", + + .init = ethdev_tx_node_init, + + .nb_edges = ETHDEV_TX_NEXT_MAX, + .next_nodes = { + [ETHDEV_TX_NEXT_PKT_DROP] = "pkt_drop", + }, +}; + +struct rte_node_register * +ethdev_tx_node_get(void) +{ + return ðdev_tx_node_base; +} + +RTE_NODE_REGISTER(ethdev_tx_node_base); diff --git a/src/spdk/dpdk/lib/librte_node/ethdev_tx_priv.h b/src/spdk/dpdk/lib/librte_node/ethdev_tx_priv.h new file mode 100644 index 000000000..586bff44a --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ethdev_tx_priv.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ +#ifndef __INCLUDE_ETHDEV_TX_PRIV_H__ +#define __INCLUDE_ETHDEV_TX_PRIV_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +struct ethdev_tx_node_ctx; +typedef struct ethdev_tx_node_ctx ethdev_tx_node_ctx_t; + +enum ethdev_tx_next_nodes { + ETHDEV_TX_NEXT_PKT_DROP, + ETHDEV_TX_NEXT_MAX, +}; + +/** + * @internal + * + * Ethernet Tx node context structure. + */ +struct ethdev_tx_node_ctx { + uint16_t port; /**< Port identifier of the Ethernet Tx node. */ + uint16_t queue; /**< Queue identifier of the Ethernet Tx node. */ +}; + +/** + * @internal + * + * Ethernet Tx node main structure. + */ +struct ethdev_tx_node_main { + uint32_t nodes[RTE_MAX_ETHPORTS]; /**< Tx nodes for each ethdev port. */ +}; + +/** + * @internal + * + * Get the Ethernet Tx node data. + * + * @return + * Pointer to Ethernet Tx node data. + */ +struct ethdev_tx_node_main *ethdev_tx_node_data_get(void); + +/** + * @internal + * + * Get the Ethernet Tx node. + * + * @retrun + * Pointer to the Ethernet Tx node. + */ +struct rte_node_register *ethdev_tx_node_get(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_ETHDEV_TX_PRIV_H__ */ diff --git a/src/spdk/dpdk/lib/librte_node/ip4_lookup.c b/src/spdk/dpdk/lib/librte_node/ip4_lookup.c new file mode 100644 index 000000000..8e6379457 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ip4_lookup.c @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#include <arpa/inet.h> +#include <sys/socket.h> + +#include <rte_debug.h> +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_graph.h> +#include <rte_graph_worker.h> +#include <rte_ip.h> +#include <rte_lpm.h> +#include <rte_mbuf.h> +#include <rte_tcp.h> +#include <rte_udp.h> + +#include "rte_node_ip4_api.h" + +#include "node_private.h" + +#define IPV4_L3FWD_LPM_MAX_RULES 1024 +#define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8) + +/* IP4 Lookup global data struct */ +struct ip4_lookup_node_main { + struct rte_lpm *lpm_tbl[RTE_MAX_NUMA_NODES]; +}; + +static struct ip4_lookup_node_main ip4_lookup_nm; + +#if defined(RTE_MACHINE_CPUFLAG_NEON) +#include "ip4_lookup_neon.h" +#elif defined(RTE_ARCH_X86) +#include "ip4_lookup_sse.h" +#else + +static uint16_t +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node, + void **objs, uint16_t nb_objs) +{ + struct rte_ipv4_hdr *ipv4_hdr; + void **to_next, **from; + uint16_t last_spec = 0; + struct rte_mbuf *mbuf; + rte_edge_t next_index; + struct rte_lpm *lpm; + uint16_t held = 0; + uint32_t drop_nh; + int i, rc; + + /* Speculative next */ + next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE; + /* Drop node */ + drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16; + + /* Get socket specific LPM from ctx */ + lpm = *((struct rte_lpm **)node->ctx); + from = objs; + + /* Get stream for the speculated next node */ + to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); + for (i = 0; i < nb_objs; i++) { + uint32_t next_hop; + uint16_t next; + + mbuf = (struct rte_mbuf *)objs[i]; + + /* Extract DIP of mbuf0 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + /* Extract cksum, ttl as ipv4 hdr is in cache */ + node_mbuf_priv1(mbuf)->cksum = ipv4_hdr->hdr_checksum; + node_mbuf_priv1(mbuf)->ttl = ipv4_hdr->time_to_live; + + rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr), + &next_hop); + next_hop = (rc == 0) ? next_hop : drop_nh; + + node_mbuf_priv1(mbuf)->nh = (uint16_t)next_hop; + next_hop = next_hop >> 16; + next = (uint16_t)next_hop; + + if (unlikely(next_index != next)) { + /* Copy things successfully speculated till now */ + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + from += last_spec; + to_next += last_spec; + held += last_spec; + last_spec = 0; + + rte_node_enqueue_x1(graph, node, next, from[0]); + from += 1; + } else { + last_spec += 1; + } + } + + /* !!! Home run !!! */ + if (likely(last_spec == nb_objs)) { + rte_node_next_stream_move(graph, node, next_index); + return nb_objs; + } + held += last_spec; + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + rte_node_next_stream_put(graph, node, next_index, held); + + return nb_objs; +} + +#endif + +int +rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop, + enum rte_node_ip4_lookup_next next_node) +{ + char abuf[INET6_ADDRSTRLEN]; + struct in_addr in; + uint8_t socket; + uint32_t val; + int ret; + + in.s_addr = htonl(ip); + inet_ntop(AF_INET, &in, abuf, sizeof(abuf)); + /* Embedded next node id into 24 bit next hop */ + val = ((next_node << 16) | next_hop) & ((1ull << 24) - 1); + node_dbg("ip4_lookup", "LPM: Adding route %s / %d nh (0x%x)", abuf, + depth, val); + + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + if (!ip4_lookup_nm.lpm_tbl[socket]) + continue; + + ret = rte_lpm_add(ip4_lookup_nm.lpm_tbl[socket], + ip, depth, val); + if (ret < 0) { + node_err("ip4_lookup", + "Unable to add entry %s / %d nh (%x) to LPM table on sock %d, rc=%d\n", + abuf, depth, val, socket, ret); + return ret; + } + } + + return 0; +} + +static int +setup_lpm(struct ip4_lookup_node_main *nm, int socket) +{ + struct rte_lpm_config config_ipv4; + char s[RTE_LPM_NAMESIZE]; + + /* One LPM table per socket */ + if (nm->lpm_tbl[socket]) + return 0; + + /* create the LPM table */ + config_ipv4.max_rules = IPV4_L3FWD_LPM_MAX_RULES; + config_ipv4.number_tbl8s = IPV4_L3FWD_LPM_NUMBER_TBL8S; + config_ipv4.flags = 0; + snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socket); + nm->lpm_tbl[socket] = rte_lpm_create(s, socket, &config_ipv4); + if (nm->lpm_tbl[socket] == NULL) + return -rte_errno; + + return 0; +} + +static int +ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node) +{ + struct rte_lpm **lpm_p = (struct rte_lpm **)&node->ctx; + uint16_t socket, lcore_id; + static uint8_t init_once; + int rc; + + RTE_SET_USED(graph); + RTE_SET_USED(node); + + if (!init_once) { + /* Setup LPM tables for all sockets */ + RTE_LCORE_FOREACH(lcore_id) + { + socket = rte_lcore_to_socket_id(lcore_id); + rc = setup_lpm(&ip4_lookup_nm, socket); + if (rc) { + node_err("ip4_lookup", + "Failed to setup lpm tbl for sock %u, rc=%d", + socket, rc); + return rc; + } + } + init_once = 1; + } + *lpm_p = ip4_lookup_nm.lpm_tbl[graph->socket]; + node_dbg("ip4_lookup", "Initialized ip4_lookup node"); + + return 0; +} + +static struct rte_node_register ip4_lookup_node = { + .process = ip4_lookup_node_process, + .name = "ip4_lookup", + + .init = ip4_lookup_node_init, + + .nb_edges = RTE_NODE_IP4_LOOKUP_NEXT_MAX, + .next_nodes = { + [RTE_NODE_IP4_LOOKUP_NEXT_REWRITE] = "ip4_rewrite", + [RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP] = "pkt_drop", + }, +}; + +RTE_NODE_REGISTER(ip4_lookup_node); diff --git a/src/spdk/dpdk/lib/librte_node/ip4_lookup_neon.h b/src/spdk/dpdk/lib/librte_node/ip4_lookup_neon.h new file mode 100644 index 000000000..dd21cb28a --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ip4_lookup_neon.h @@ -0,0 +1,239 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#ifndef __INCLUDE_IP4_LOOKUP_NEON_H__ +#define __INCLUDE_IP4_LOOKUP_NEON_H__ + +/* ARM64 NEON */ +static uint16_t +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node, + void **objs, uint16_t nb_objs) +{ + struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; + struct rte_ipv4_hdr *ipv4_hdr; + void **to_next, **from; + uint16_t last_spec = 0; + rte_edge_t next_index; + uint16_t n_left_from; + struct rte_lpm *lpm; + uint16_t held = 0; + uint32_t drop_nh; + rte_xmm_t result; + rte_xmm_t priv01; + rte_xmm_t priv23; + int32x4_t dip; + int rc, i; + + /* Speculative next */ + next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE; + /* Drop node */ + drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16; + + /* Get socket specific LPM from ctx */ + lpm = *((struct rte_lpm **)node->ctx); + + pkts = (struct rte_mbuf **)objs; + from = objs; + n_left_from = nb_objs; + +#define OBJS_PER_CLINE (RTE_CACHE_LINE_SIZE / sizeof(void *)) + for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += OBJS_PER_CLINE) + rte_prefetch0(&objs[i]); + + for (i = 0; i < 4 && i < n_left_from; i++) + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *, + sizeof(struct rte_ether_hdr))); + + dip = vdupq_n_s32(0); + /* Get stream for the speculated next node */ + to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); + while (n_left_from >= 4) { +#if RTE_GRAPH_BURST_SIZE > 64 + /* Prefetch next-next mbufs */ + if (likely(n_left_from > 11)) { + rte_prefetch0(pkts[8]); + rte_prefetch0(pkts[9]); + rte_prefetch0(pkts[10]); + rte_prefetch0(pkts[11]); + } +#endif + /* Prefetch next mbuf data */ + if (likely(n_left_from > 7)) { + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, + sizeof(struct rte_ether_hdr))); + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, + sizeof(struct rte_ether_hdr))); + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, + sizeof(struct rte_ether_hdr))); + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, + sizeof(struct rte_ether_hdr))); + } + + mbuf0 = pkts[0]; + mbuf1 = pkts[1]; + mbuf2 = pkts[2]; + mbuf3 = pkts[3]; + + pkts += 4; + n_left_from -= 4; + + /* Extract DIP of mbuf0 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 0); + /* Extract cksum, ttl as ipv4 hdr is in cache */ + priv01.u16[1] = ipv4_hdr->time_to_live; + priv01.u32[1] = ipv4_hdr->hdr_checksum; + + /* Extract DIP of mbuf1 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 1); + /* Extract cksum, ttl as ipv4 hdr is in cache */ + priv01.u16[5] = ipv4_hdr->time_to_live; + priv01.u32[3] = ipv4_hdr->hdr_checksum; + + /* Extract DIP of mbuf2 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 2); + /* Extract cksum, ttl as ipv4 hdr is in cache */ + priv23.u16[1] = ipv4_hdr->time_to_live; + priv23.u32[1] = ipv4_hdr->hdr_checksum; + + /* Extract DIP of mbuf3 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 3); + + dip = vreinterpretq_s32_u8( + vrev32q_u8(vreinterpretq_u8_s32(dip))); + /* Extract cksum, ttl as ipv4 hdr is in cache */ + priv23.u16[5] = ipv4_hdr->time_to_live; + priv23.u32[3] = ipv4_hdr->hdr_checksum; + + /* Perform LPM lookup to get NH and next node */ + rte_lpm_lookupx4(lpm, dip, result.u32, drop_nh); + priv01.u16[0] = result.u16[0]; + priv01.u16[4] = result.u16[2]; + priv23.u16[0] = result.u16[4]; + priv23.u16[4] = result.u16[6]; + + node_mbuf_priv1(mbuf0)->u = priv01.u64[0]; + node_mbuf_priv1(mbuf1)->u = priv01.u64[1]; + node_mbuf_priv1(mbuf2)->u = priv23.u64[0]; + node_mbuf_priv1(mbuf3)->u = priv23.u64[1]; + + /* Enqueue four to next node */ + rte_edge_t fix_spec = ((next_index == result.u16[1]) && + (result.u16[1] == result.u16[3]) && + (result.u16[3] == result.u16[5]) && + (result.u16[5] == result.u16[7])); + + if (unlikely(fix_spec == 0)) { + /* Copy things successfully speculated till now */ + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + from += last_spec; + to_next += last_spec; + held += last_spec; + last_spec = 0; + + /* Next0 */ + if (next_index == result.u16[1]) { + to_next[0] = from[0]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, result.u16[1], + from[0]); + } + + /* Next1 */ + if (next_index == result.u16[3]) { + to_next[0] = from[1]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, result.u16[3], + from[1]); + } + + /* Next2 */ + if (next_index == result.u16[5]) { + to_next[0] = from[2]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, result.u16[5], + from[2]); + } + + /* Next3 */ + if (next_index == result.u16[7]) { + to_next[0] = from[3]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, result.u16[7], + from[3]); + } + + from += 4; + } else { + last_spec += 4; + } + } + + while (n_left_from > 0) { + uint32_t next_hop; + uint16_t next0; + + mbuf0 = pkts[0]; + + pkts += 1; + n_left_from -= 1; + + /* Extract DIP of mbuf0 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + /* Extract cksum, ttl as ipv4 hdr is in cache */ + node_mbuf_priv1(mbuf0)->cksum = ipv4_hdr->hdr_checksum; + node_mbuf_priv1(mbuf0)->ttl = ipv4_hdr->time_to_live; + + rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr), + &next_hop); + next_hop = (rc == 0) ? next_hop : drop_nh; + + node_mbuf_priv1(mbuf0)->nh = (uint16_t)next_hop; + next_hop = next_hop >> 16; + next0 = (uint16_t)next_hop; + + if (unlikely(next_index ^ next0)) { + /* Copy things successfully speculated till now */ + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + from += last_spec; + to_next += last_spec; + held += last_spec; + last_spec = 0; + + rte_node_enqueue_x1(graph, node, next0, from[0]); + from += 1; + } else { + last_spec += 1; + } + } + + /* !!! Home run !!! */ + if (likely(last_spec == nb_objs)) { + rte_node_next_stream_move(graph, node, next_index); + return nb_objs; + } + held += last_spec; + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + rte_node_next_stream_put(graph, node, next_index, held); + + return nb_objs; +} + +#endif /* __INCLUDE_IP4_LOOKUP_NEON_H__ */ diff --git a/src/spdk/dpdk/lib/librte_node/ip4_lookup_sse.h b/src/spdk/dpdk/lib/librte_node/ip4_lookup_sse.h new file mode 100644 index 000000000..a071cc591 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ip4_lookup_sse.h @@ -0,0 +1,244 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#ifndef __INCLUDE_IP4_LOOKUP_SSE_H__ +#define __INCLUDE_IP4_LOOKUP_SSE_H__ + +/* X86 SSE */ +static uint16_t +ip4_lookup_node_process(struct rte_graph *graph, struct rte_node *node, + void **objs, uint16_t nb_objs) +{ + struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; + rte_edge_t next0, next1, next2, next3, next_index; + struct rte_ipv4_hdr *ipv4_hdr; + uint32_t ip0, ip1, ip2, ip3; + void **to_next, **from; + uint16_t last_spec = 0; + uint16_t n_left_from; + struct rte_lpm *lpm; + uint16_t held = 0; + uint32_t drop_nh; + rte_xmm_t dst; + __m128i dip; /* SSE register */ + int rc, i; + + /* Speculative next */ + next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE; + /* Drop node */ + drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16; + + /* Get socket specific LPM from ctx */ + lpm = *((struct rte_lpm **)node->ctx); + + pkts = (struct rte_mbuf **)objs; + from = objs; + n_left_from = nb_objs; + + if (n_left_from >= 4) { + for (i = 0; i < 4; i++) + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *, + sizeof(struct rte_ether_hdr))); + } + + /* Get stream for the speculated next node */ + to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); + while (n_left_from >= 4) { + /* Prefetch next-next mbufs */ + if (likely(n_left_from > 11)) { + rte_prefetch0(pkts[8]); + rte_prefetch0(pkts[9]); + rte_prefetch0(pkts[10]); + rte_prefetch0(pkts[11]); + } + + /* Prefetch next mbuf data */ + if (likely(n_left_from > 7)) { + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, + sizeof(struct rte_ether_hdr))); + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, + sizeof(struct rte_ether_hdr))); + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, + sizeof(struct rte_ether_hdr))); + rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, + sizeof(struct rte_ether_hdr))); + } + + mbuf0 = pkts[0]; + mbuf1 = pkts[1]; + mbuf2 = pkts[2]; + mbuf3 = pkts[3]; + + pkts += 4; + n_left_from -= 4; + + /* Extract DIP of mbuf0 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + ip0 = ipv4_hdr->dst_addr; + /* Extract cksum, ttl as ipv4 hdr is in cache */ + node_mbuf_priv1(mbuf0)->cksum = ipv4_hdr->hdr_checksum; + node_mbuf_priv1(mbuf0)->ttl = ipv4_hdr->time_to_live; + + /* Extract DIP of mbuf1 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + ip1 = ipv4_hdr->dst_addr; + /* Extract cksum, ttl as ipv4 hdr is in cache */ + node_mbuf_priv1(mbuf1)->cksum = ipv4_hdr->hdr_checksum; + node_mbuf_priv1(mbuf1)->ttl = ipv4_hdr->time_to_live; + + /* Extract DIP of mbuf2 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + ip2 = ipv4_hdr->dst_addr; + /* Extract cksum, ttl as ipv4 hdr is in cache */ + node_mbuf_priv1(mbuf2)->cksum = ipv4_hdr->hdr_checksum; + node_mbuf_priv1(mbuf2)->ttl = ipv4_hdr->time_to_live; + + /* Extract DIP of mbuf3 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + ip3 = ipv4_hdr->dst_addr; + + /* Prepare for lookup x4 */ + dip = _mm_set_epi32(ip3, ip2, ip1, ip0); + + /* Byte swap 4 IPV4 addresses. */ + const __m128i bswap_mask = _mm_set_epi8( + 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); + dip = _mm_shuffle_epi8(dip, bswap_mask); + + /* Extract cksum, ttl as ipv4 hdr is in cache */ + node_mbuf_priv1(mbuf3)->cksum = ipv4_hdr->hdr_checksum; + node_mbuf_priv1(mbuf3)->ttl = ipv4_hdr->time_to_live; + + /* Perform LPM lookup to get NH and next node */ + rte_lpm_lookupx4(lpm, dip, dst.u32, drop_nh); + + /* Extract next node id and NH */ + node_mbuf_priv1(mbuf0)->nh = dst.u32[0] & 0xFFFF; + next0 = (dst.u32[0] >> 16); + + node_mbuf_priv1(mbuf1)->nh = dst.u32[1] & 0xFFFF; + next1 = (dst.u32[1] >> 16); + + node_mbuf_priv1(mbuf2)->nh = dst.u32[2] & 0xFFFF; + next2 = (dst.u32[2] >> 16); + + node_mbuf_priv1(mbuf3)->nh = dst.u32[3] & 0xFFFF; + next3 = (dst.u32[3] >> 16); + + /* Enqueue four to next node */ + rte_edge_t fix_spec = + (next_index ^ next0) | (next_index ^ next1) | + (next_index ^ next2) | (next_index ^ next3); + + if (unlikely(fix_spec)) { + /* Copy things successfully speculated till now */ + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + from += last_spec; + to_next += last_spec; + held += last_spec; + last_spec = 0; + + /* Next0 */ + if (next_index == next0) { + to_next[0] = from[0]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, next0, + from[0]); + } + + /* Next1 */ + if (next_index == next1) { + to_next[0] = from[1]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, next1, + from[1]); + } + + /* Next2 */ + if (next_index == next2) { + to_next[0] = from[2]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, next2, + from[2]); + } + + /* Next3 */ + if (next_index == next3) { + to_next[0] = from[3]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, next3, + from[3]); + } + + from += 4; + + } else { + last_spec += 4; + } + } + + while (n_left_from > 0) { + uint32_t next_hop; + + mbuf0 = pkts[0]; + + pkts += 1; + n_left_from -= 1; + + /* Extract DIP of mbuf0 */ + ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, + sizeof(struct rte_ether_hdr)); + /* Extract cksum, ttl as ipv4 hdr is in cache */ + node_mbuf_priv1(mbuf0)->cksum = ipv4_hdr->hdr_checksum; + node_mbuf_priv1(mbuf0)->ttl = ipv4_hdr->time_to_live; + + rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr), + &next_hop); + next_hop = (rc == 0) ? next_hop : drop_nh; + + node_mbuf_priv1(mbuf0)->nh = next_hop & 0xFFFF; + next0 = (next_hop >> 16); + + if (unlikely(next_index ^ next0)) { + /* Copy things successfully speculated till now */ + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + from += last_spec; + to_next += last_spec; + held += last_spec; + last_spec = 0; + + rte_node_enqueue_x1(graph, node, next0, from[0]); + from += 1; + } else { + last_spec += 1; + } + } + + /* !!! Home run !!! */ + if (likely(last_spec == nb_objs)) { + rte_node_next_stream_move(graph, node, next_index); + return nb_objs; + } + + held += last_spec; + /* Copy things successfully speculated till now */ + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + rte_node_next_stream_put(graph, node, next_index, held); + + return nb_objs; +} + +#endif /* __INCLUDE_IP4_LOOKUP_SSE_H__ */ diff --git a/src/spdk/dpdk/lib/librte_node/ip4_rewrite.c b/src/spdk/dpdk/lib/librte_node/ip4_rewrite.c new file mode 100644 index 000000000..bb7f671b5 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ip4_rewrite.c @@ -0,0 +1,326 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#include <rte_debug.h> +#include <rte_ethdev.h> +#include <rte_ether.h> +#include <rte_graph.h> +#include <rte_graph_worker.h> +#include <rte_ip.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_vect.h> + +#include "rte_node_ip4_api.h" + +#include "ip4_rewrite_priv.h" +#include "node_private.h" + +static struct ip4_rewrite_node_main *ip4_rewrite_nm; + +static uint16_t +ip4_rewrite_node_process(struct rte_graph *graph, struct rte_node *node, + void **objs, uint16_t nb_objs) +{ + struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; + struct ip4_rewrite_nh_header *nh = ip4_rewrite_nm->nh; + uint16_t next0, next1, next2, next3, next_index; + struct rte_ipv4_hdr *ip0, *ip1, *ip2, *ip3; + uint16_t n_left_from, held = 0, last_spec = 0; + void *d0, *d1, *d2, *d3; + void **to_next, **from; + rte_xmm_t priv01; + rte_xmm_t priv23; + int i; + + /* Speculative next as last next */ + next_index = *(uint16_t *)node->ctx; + rte_prefetch0(nh); + + pkts = (struct rte_mbuf **)objs; + from = objs; + n_left_from = nb_objs; + + for (i = 0; i < 4 && i < n_left_from; i++) + rte_prefetch0(pkts[i]); + + /* Get stream for the speculated next node */ + to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); + /* Update Ethernet header of pkts */ + while (n_left_from >= 4) { + if (likely(n_left_from > 7)) { + /* Prefetch only next-mbuf struct and priv area. + * Data need not be prefetched as we only write. + */ + rte_prefetch0(pkts[4]); + rte_prefetch0(pkts[5]); + rte_prefetch0(pkts[6]); + rte_prefetch0(pkts[7]); + } + + mbuf0 = pkts[0]; + mbuf1 = pkts[1]; + mbuf2 = pkts[2]; + mbuf3 = pkts[3]; + + pkts += 4; + n_left_from -= 4; + priv01.u64[0] = node_mbuf_priv1(mbuf0)->u; + priv01.u64[1] = node_mbuf_priv1(mbuf1)->u; + priv23.u64[0] = node_mbuf_priv1(mbuf2)->u; + priv23.u64[1] = node_mbuf_priv1(mbuf3)->u; + + /* Increment checksum by one. */ + priv01.u32[1] += rte_cpu_to_be_16(0x0100); + priv01.u32[3] += rte_cpu_to_be_16(0x0100); + priv23.u32[1] += rte_cpu_to_be_16(0x0100); + priv23.u32[3] += rte_cpu_to_be_16(0x0100); + + /* Update ttl,cksum rewrite ethernet hdr on mbuf0 */ + d0 = rte_pktmbuf_mtod(mbuf0, void *); + rte_memcpy(d0, nh[priv01.u16[0]].rewrite_data, + nh[priv01.u16[0]].rewrite_len); + + next0 = nh[priv01.u16[0]].tx_node; + ip0 = (struct rte_ipv4_hdr *)((uint8_t *)d0 + + sizeof(struct rte_ether_hdr)); + ip0->time_to_live = priv01.u16[1] - 1; + ip0->hdr_checksum = priv01.u16[2] + priv01.u16[3]; + + /* Update ttl,cksum rewrite ethernet hdr on mbuf1 */ + d1 = rte_pktmbuf_mtod(mbuf1, void *); + rte_memcpy(d1, nh[priv01.u16[4]].rewrite_data, + nh[priv01.u16[4]].rewrite_len); + + next1 = nh[priv01.u16[4]].tx_node; + ip1 = (struct rte_ipv4_hdr *)((uint8_t *)d1 + + sizeof(struct rte_ether_hdr)); + ip1->time_to_live = priv01.u16[5] - 1; + ip1->hdr_checksum = priv01.u16[6] + priv01.u16[7]; + + /* Update ttl,cksum rewrite ethernet hdr on mbuf2 */ + d2 = rte_pktmbuf_mtod(mbuf2, void *); + rte_memcpy(d2, nh[priv23.u16[0]].rewrite_data, + nh[priv23.u16[0]].rewrite_len); + next2 = nh[priv23.u16[0]].tx_node; + ip2 = (struct rte_ipv4_hdr *)((uint8_t *)d2 + + sizeof(struct rte_ether_hdr)); + ip2->time_to_live = priv23.u16[1] - 1; + ip2->hdr_checksum = priv23.u16[2] + priv23.u16[3]; + + /* Update ttl,cksum rewrite ethernet hdr on mbuf3 */ + d3 = rte_pktmbuf_mtod(mbuf3, void *); + rte_memcpy(d3, nh[priv23.u16[4]].rewrite_data, + nh[priv23.u16[4]].rewrite_len); + + next3 = nh[priv23.u16[4]].tx_node; + ip3 = (struct rte_ipv4_hdr *)((uint8_t *)d3 + + sizeof(struct rte_ether_hdr)); + ip3->time_to_live = priv23.u16[5] - 1; + ip3->hdr_checksum = priv23.u16[6] + priv23.u16[7]; + + /* Enqueue four to next node */ + rte_edge_t fix_spec = + ((next_index == next0) && (next0 == next1) && + (next1 == next2) && (next2 == next3)); + + if (unlikely(fix_spec == 0)) { + /* Copy things successfully speculated till now */ + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + from += last_spec; + to_next += last_spec; + held += last_spec; + last_spec = 0; + + /* next0 */ + if (next_index == next0) { + to_next[0] = from[0]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, next0, + from[0]); + } + + /* next1 */ + if (next_index == next1) { + to_next[0] = from[1]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, next1, + from[1]); + } + + /* next2 */ + if (next_index == next2) { + to_next[0] = from[2]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, next2, + from[2]); + } + + /* next3 */ + if (next_index == next3) { + to_next[0] = from[3]; + to_next++; + held++; + } else { + rte_node_enqueue_x1(graph, node, next3, + from[3]); + } + + from += 4; + + /* Change speculation if last two are same */ + if ((next_index != next3) && (next2 == next3)) { + /* Put the current speculated node */ + rte_node_next_stream_put(graph, node, + next_index, held); + held = 0; + + /* Get next speculated stream */ + next_index = next3; + to_next = rte_node_next_stream_get( + graph, node, next_index, nb_objs); + } + } else { + last_spec += 4; + } + } + + while (n_left_from > 0) { + uint16_t chksum; + + mbuf0 = pkts[0]; + + pkts += 1; + n_left_from -= 1; + + d0 = rte_pktmbuf_mtod(mbuf0, void *); + rte_memcpy(d0, nh[node_mbuf_priv1(mbuf0)->nh].rewrite_data, + nh[node_mbuf_priv1(mbuf0)->nh].rewrite_len); + + next0 = nh[node_mbuf_priv1(mbuf0)->nh].tx_node; + ip0 = (struct rte_ipv4_hdr *)((uint8_t *)d0 + + sizeof(struct rte_ether_hdr)); + chksum = node_mbuf_priv1(mbuf0)->cksum + + rte_cpu_to_be_16(0x0100); + chksum += chksum >= 0xffff; + ip0->hdr_checksum = chksum; + ip0->time_to_live = node_mbuf_priv1(mbuf0)->ttl - 1; + + if (unlikely(next_index ^ next0)) { + /* Copy things successfully speculated till now */ + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + from += last_spec; + to_next += last_spec; + held += last_spec; + last_spec = 0; + + rte_node_enqueue_x1(graph, node, next0, from[0]); + from += 1; + } else { + last_spec += 1; + } + } + + /* !!! Home run !!! */ + if (likely(last_spec == nb_objs)) { + rte_node_next_stream_move(graph, node, next_index); + return nb_objs; + } + + held += last_spec; + rte_memcpy(to_next, from, last_spec * sizeof(from[0])); + rte_node_next_stream_put(graph, node, next_index, held); + /* Save the last next used */ + *(uint16_t *)node->ctx = next_index; + + return nb_objs; +} + +static int +ip4_rewrite_node_init(const struct rte_graph *graph, struct rte_node *node) +{ + + RTE_SET_USED(graph); + RTE_SET_USED(node); + node_dbg("ip4_rewrite", "Initialized ip4_rewrite node initialized"); + + return 0; +} + +int +ip4_rewrite_set_next(uint16_t port_id, uint16_t next_index) +{ + if (ip4_rewrite_nm == NULL) { + ip4_rewrite_nm = rte_zmalloc( + "ip4_rewrite", sizeof(struct ip4_rewrite_node_main), + RTE_CACHE_LINE_SIZE); + if (ip4_rewrite_nm == NULL) + return -ENOMEM; + } + ip4_rewrite_nm->next_index[port_id] = next_index; + + return 0; +} + +int +rte_node_ip4_rewrite_add(uint16_t next_hop, uint8_t *rewrite_data, + uint8_t rewrite_len, uint16_t dst_port) +{ + struct ip4_rewrite_nh_header *nh; + + if (next_hop >= RTE_GRAPH_IP4_REWRITE_MAX_NH) + return -EINVAL; + + if (rewrite_len > RTE_GRAPH_IP4_REWRITE_MAX_LEN) + return -EINVAL; + + if (ip4_rewrite_nm == NULL) { + ip4_rewrite_nm = rte_zmalloc( + "ip4_rewrite", sizeof(struct ip4_rewrite_node_main), + RTE_CACHE_LINE_SIZE); + if (ip4_rewrite_nm == NULL) + return -ENOMEM; + } + + /* Check if dst port doesn't exist as edge */ + if (!ip4_rewrite_nm->next_index[dst_port]) + return -EINVAL; + + /* Update next hop */ + nh = &ip4_rewrite_nm->nh[next_hop]; + + memcpy(nh->rewrite_data, rewrite_data, rewrite_len); + nh->tx_node = ip4_rewrite_nm->next_index[dst_port]; + nh->rewrite_len = rewrite_len; + nh->enabled = true; + + return 0; +} + +static struct rte_node_register ip4_rewrite_node = { + .process = ip4_rewrite_node_process, + .name = "ip4_rewrite", + /* Default edge i.e '0' is pkt drop */ + .nb_edges = 1, + .next_nodes = { + [0] = "pkt_drop", + }, + .init = ip4_rewrite_node_init, +}; + +struct rte_node_register * +ip4_rewrite_node_get(void) +{ + return &ip4_rewrite_node; +} + +RTE_NODE_REGISTER(ip4_rewrite_node); diff --git a/src/spdk/dpdk/lib/librte_node/ip4_rewrite_priv.h b/src/spdk/dpdk/lib/librte_node/ip4_rewrite_priv.h new file mode 100644 index 000000000..80f0abdc9 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/ip4_rewrite_priv.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ +#ifndef __INCLUDE_IP4_REWRITE_PRIV_H__ +#define __INCLUDE_IP4_REWRITE_PRIV_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_common.h> + +#define RTE_GRAPH_IP4_REWRITE_MAX_NH 64 +#define RTE_GRAPH_IP4_REWRITE_MAX_LEN 56 + +/** + * @internal + * + * Ipv4 rewrite next hop header data structure. Used to store port specific + * rewrite data. + */ +struct ip4_rewrite_nh_header { + uint16_t rewrite_len; /**< Header rewrite length. */ + uint16_t tx_node; /**< Tx node next index identifier. */ + uint16_t enabled; /**< NH enable flag */ + uint16_t rsvd; + union { + struct { + struct rte_ether_addr dst; + /**< Destination mac address. */ + struct rte_ether_addr src; + /**< Source mac address. */ + }; + uint8_t rewrite_data[RTE_GRAPH_IP4_REWRITE_MAX_LEN]; + /**< Generic rewrite data */ + }; +}; + +/** + * @internal + * + * Ipv4 node main data structure. + */ +struct ip4_rewrite_node_main { + struct ip4_rewrite_nh_header nh[RTE_GRAPH_IP4_REWRITE_MAX_NH]; + /**< Array of next hop header data */ + uint16_t next_index[RTE_MAX_ETHPORTS]; + /**< Next index of each configured port. */ +}; + +/** + * @internal + * + * Get the ipv4 rewrite node. + * + * @retrun + * Pointer to the ipv4 rewrite node. + */ +struct rte_node_register *ip4_rewrite_node_get(void); + +/** + * @internal + * + * Set the Edge index of a given port_id. + * + * @param port_id + * Ethernet port identifier. + * @param next_index + * Edge index of the Given Tx node. + */ +int ip4_rewrite_set_next(uint16_t port_id, uint16_t next_index); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_IP4_REWRITE_PRIV_H__ */ diff --git a/src/spdk/dpdk/lib/librte_node/log.c b/src/spdk/dpdk/lib/librte_node/log.c new file mode 100644 index 000000000..f035f91e8 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/log.c @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#include "node_private.h" + +int rte_node_logtype; + +RTE_INIT(rte_node_init_log) +{ + rte_node_logtype = rte_log_register("lib.node"); + if (rte_node_logtype >= 0) + rte_log_set_level(rte_node_logtype, RTE_LOG_INFO); +} diff --git a/src/spdk/dpdk/lib/librte_node/meson.build b/src/spdk/dpdk/lib/librte_node/meson.build new file mode 100644 index 000000000..c8b70befb --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/meson.build @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(C) 2020 Marvell International Ltd. + +sources = files('null.c', 'log.c', 'ethdev_rx.c', 'ethdev_tx.c', 'ip4_lookup.c', + 'ip4_rewrite.c', 'pkt_drop.c', 'ethdev_ctrl.c') +headers = files('rte_node_ip4_api.h', 'rte_node_eth_api.h') +# Strict-aliasing rules are violated by uint8_t[] to context size casts. +cflags += '-fno-strict-aliasing' +deps += ['graph', 'mbuf', 'lpm', 'ethdev', 'mempool', 'cryptodev'] +build = false +reason = 'not needed by SPDK' diff --git a/src/spdk/dpdk/lib/librte_node/node_private.h b/src/spdk/dpdk/lib/librte_node/node_private.h new file mode 100644 index 000000000..975b9aa45 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/node_private.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#ifndef __NODE_PRIVATE_H__ +#define __NODE_PRIVATE_H__ + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_mbuf.h> + +extern int rte_node_logtype; +#define NODE_LOG(level, node_name, ...) \ + rte_log(RTE_LOG_##level, rte_node_logtype, \ + RTE_FMT("NODE %s: %s():%u " RTE_FMT_HEAD(__VA_ARGS__, ) "\n", \ + node_name, __func__, __LINE__, \ + RTE_FMT_TAIL(__VA_ARGS__, ))) + +#define node_err(node_name, ...) NODE_LOG(ERR, node_name, __VA_ARGS__) +#define node_info(node_name, ...) NODE_LOG(INFO, node_name, __VA_ARGS__) +#define node_dbg(node_name, ...) NODE_LOG(DEBUG, node_name, __VA_ARGS__) + +/** + * + * Node mbuf private data to store next hop, ttl and checksum. + */ +struct node_mbuf_priv1 { + union { + /* IP4 rewrite */ + struct { + uint16_t nh; + uint16_t ttl; + uint32_t cksum; + }; + + uint64_t u; + }; +}; + +/** + * Node mbuf private area 2. + */ +struct node_mbuf_priv2 { + uint64_t priv_data; +} __rte_cache_aligned; + +#define NODE_MBUF_PRIV2_SIZE sizeof(struct node_mbuf_priv2) + +/** + * Get mbuf_priv1 pointer from rte_mbuf. + * + * @param + * Pointer to the rte_mbuf. + * + * @return + * Pointer to the mbuf_priv1. + */ +static __rte_always_inline struct node_mbuf_priv1 * +node_mbuf_priv1(struct rte_mbuf *m) +{ + return (struct node_mbuf_priv1 *)&m->udata64; +} + +/** + * Get mbuf_priv2 pointer from rte_mbuf. + * + * @param + * Pointer to the rte_mbuf. + * + * @return + * Pointer to the mbuf_priv2. + */ +static __rte_always_inline struct node_mbuf_priv2 * +node_mbuf_priv2(struct rte_mbuf *m) +{ + return (struct node_mbuf_priv2 *)rte_mbuf_to_priv(m); +} + +#endif /* __NODE_PRIVATE_H__ */ diff --git a/src/spdk/dpdk/lib/librte_node/null.c b/src/spdk/dpdk/lib/librte_node/null.c new file mode 100644 index 000000000..c7cd8b6df --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/null.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#include <rte_graph.h> + +static uint16_t +null(struct rte_graph *graph, struct rte_node *node, void **objs, + uint16_t nb_objs) +{ + RTE_SET_USED(node); + RTE_SET_USED(objs); + RTE_SET_USED(graph); + + return nb_objs; +} + +static struct rte_node_register null_node = { + .name = "null", + .process = null, +}; + +RTE_NODE_REGISTER(null_node); diff --git a/src/spdk/dpdk/lib/librte_node/pkt_drop.c b/src/spdk/dpdk/lib/librte_node/pkt_drop.c new file mode 100644 index 000000000..c35001323 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/pkt_drop.c @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#include <rte_debug.h> +#include <rte_graph.h> +#include <rte_mbuf.h> + +static uint16_t +pkt_drop_process(struct rte_graph *graph, struct rte_node *node, void **objs, + uint16_t nb_objs) +{ + RTE_SET_USED(node); + RTE_SET_USED(graph); + + rte_pktmbuf_free_bulk((struct rte_mbuf **)objs, nb_objs); + + return nb_objs; +} + +static struct rte_node_register pkt_drop_node = { + .process = pkt_drop_process, + .name = "pkt_drop", +}; + +RTE_NODE_REGISTER(pkt_drop_node); diff --git a/src/spdk/dpdk/lib/librte_node/rte_node_eth_api.h b/src/spdk/dpdk/lib/librte_node/rte_node_eth_api.h new file mode 100644 index 000000000..e9a53afe5 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/rte_node_eth_api.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#ifndef __INCLUDE_RTE_NODE_ETH_API_H__ +#define __INCLUDE_RTE_NODE_ETH_API_H__ + +/** + * @file rte_node_eth_api.h + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This API allows to setup ethdev_rx and ethdev_tx nodes + * and its queue associations. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_common.h> +#include <rte_mempool.h> + +/** + * Port config for ethdev_rx and ethdev_tx node. + */ +struct rte_node_ethdev_config { + uint16_t port_id; + /**< Port identifier */ + uint16_t num_rx_queues; + /**< Number of Rx queues. */ + uint16_t num_tx_queues; + /**< Number of Tx queues. */ + struct rte_mempool **mp; + /**< Array of mempools associated to Rx queue. */ + uint16_t mp_count; + /**< Size of mp array. */ +}; + +/** + * Initializes ethdev nodes. + * + * @param cfg + * Array of ethdev config that identifies which port's + * ethdev_rx and ethdev_tx nodes need to be created + * and queue association. + * @param cnt + * Size of cfg array. + * @param nb_graphs + * Number of graphs that will be used. + * + * @return + * 0 on successful initialization, negative otherwise. + */ +__rte_experimental +int rte_node_eth_config(struct rte_node_ethdev_config *cfg, + uint16_t cnt, uint16_t nb_graphs); +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_NODE_ETH_API_H__ */ diff --git a/src/spdk/dpdk/lib/librte_node/rte_node_ip4_api.h b/src/spdk/dpdk/lib/librte_node/rte_node_ip4_api.h new file mode 100644 index 000000000..31a752b00 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/rte_node_ip4_api.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(C) 2020 Marvell International Ltd. + */ + +#ifndef __INCLUDE_RTE_NODE_IP4_API_H__ +#define __INCLUDE_RTE_NODE_IP4_API_H__ + +/** + * @file rte_node_ip4_api.h + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This API allows to do control path functions of ip4_* nodes + * like ip4_lookup, ip4_rewrite. + * + */ +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_common.h> + +/** + * IP4 lookup next nodes. + */ +enum rte_node_ip4_lookup_next { + RTE_NODE_IP4_LOOKUP_NEXT_REWRITE, + /**< Rewrite node. */ + RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP, + /**< Packet drop node. */ + RTE_NODE_IP4_LOOKUP_NEXT_MAX, + /**< Number of next nodes of lookup node. */ +}; + +/** + * Add ipv4 route to lookup table. + * + * @param ip + * IP address of route to be added. + * @param depth + * Depth of the rule to be added. + * @param next_hop + * Next hop id of the rule result to be added. + * @param next_node + * Next node to redirect traffic to. + * + * @return + * 0 on success, negative otherwise. + */ +__rte_experimental +int rte_node_ip4_route_add(uint32_t ip, uint8_t depth, uint16_t next_hop, + enum rte_node_ip4_lookup_next next_node); + +/** + * Add a next hop's rewrite data. + * + * @param next_hop + * Next hop id to add rewrite data to. + * @param rewrite_data + * Rewrite data. + * @param rewrite_len + * Length of rewrite data. + * @param dst_port + * Destination port to redirect traffic to. + * + * @return + * 0 on success, negative otherwise. + */ +__rte_experimental +int rte_node_ip4_rewrite_add(uint16_t next_hop, uint8_t *rewrite_data, + uint8_t rewrite_len, uint16_t dst_port); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_NODE_IP4_API_H__ */ diff --git a/src/spdk/dpdk/lib/librte_node/rte_node_version.map b/src/spdk/dpdk/lib/librte_node/rte_node_version.map new file mode 100644 index 000000000..a799b0d38 --- /dev/null +++ b/src/spdk/dpdk/lib/librte_node/rte_node_version.map @@ -0,0 +1,9 @@ +EXPERIMENTAL { + global: + + rte_node_eth_config; + rte_node_ip4_route_add; + rte_node_ip4_rewrite_add; + rte_node_logtype; + local: *; +}; |