diff options
Diffstat (limited to 'src/seastar/dpdk/lib/librte_port')
19 files changed, 5281 insertions, 0 deletions
diff --git a/src/seastar/dpdk/lib/librte_port/Makefile b/src/seastar/dpdk/lib/librte_port/Makefile new file mode 100644 index 00000000..76629a13 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/Makefile @@ -0,0 +1,80 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_port.a +ifeq ($(CONFIG_RTE_PORT_PCAP),y) +LDLIBS += -lpcap +endif + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_port_version.map + +LIBABIVER := 3 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ethdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ring.c +ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y) +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_frag.c +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ras.c +endif +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sched.c +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_fd.c +ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_kni.c +endif +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_source_sink.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ethdev.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ring.h +ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_frag.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ras.h +endif +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sched.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_fd.h +ifeq ($(CONFIG_RTE_LIBRTE_KNI),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_kni.h +endif +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_source_sink.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/seastar/dpdk/lib/librte_port/rte_port.h b/src/seastar/dpdk/lib/librte_port/rte_port.h new file mode 100644 index 00000000..c3c53487 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port.h @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_H__ +#define __INCLUDE_RTE_PORT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port + * + * This tool is part of the DPDK Packet Framework tool suite and provides + * a standard interface to implement different types of packet ports. + * + ***/ + +#include <stdint.h> +#include <rte_mbuf.h> + +/**@{ + * Macros to allow accessing metadata stored in the mbuf headroom + * just beyond the end of the mbuf data structure returned by a port + */ +#define RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset) \ + (&((uint8_t *)(mbuf))[offset]) +#define RTE_MBUF_METADATA_UINT16_PTR(mbuf, offset) \ + ((uint16_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT32_PTR(mbuf, offset) \ + ((uint32_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT64_PTR(mbuf, offset) \ + ((uint64_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) + +#define RTE_MBUF_METADATA_UINT8(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT16(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT16_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT32(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT32_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT64(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT64_PTR(mbuf, offset)) +/**@}*/ + +/* + * Port IN + * + */ +/** Maximum number of packets read from any input port in a single burst. +Cannot be changed. */ +#define RTE_PORT_IN_BURST_SIZE_MAX 64 + +/** Input port statistics */ +struct rte_port_in_stats { + uint64_t n_pkts_in; + uint64_t n_pkts_drop; +}; + +/** + * Input port create + * + * @param params + * Parameters for input port creation + * @param socket_id + * CPU socket ID (e.g. for memory allocation purpose) + * @return + * Handle to input port instance + */ +typedef void* (*rte_port_in_op_create)(void *params, int socket_id); + +/** + * Input port free + * + * @param port + * Handle to input port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_in_op_free)(void *port); + +/** + * Input port packet burst RX + * + * @param port + * Handle to input port instance + * @param pkts + * Burst of input packets + * @param n_pkts + * Number of packets in the input burst + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_in_op_rx)( + void *port, + struct rte_mbuf **pkts, + uint32_t n_pkts); + +/** + * Input port stats get + * + * @param port + * Handle to output port instance + * @param stats + * Handle to port_in stats struct to copy data + * @param clear + * Flag indicating that stats should be cleared after read + * + * @return + * Error code or 0 on success. + */ +typedef int (*rte_port_in_op_stats_read)( + void *port, + struct rte_port_in_stats *stats, + int clear); + +/** Input port interface defining the input port operation */ +struct rte_port_in_ops { + rte_port_in_op_create f_create; /**< Create */ + rte_port_in_op_free f_free; /**< Free */ + rte_port_in_op_rx f_rx; /**< Packet RX (packet burst) */ + rte_port_in_op_stats_read f_stats; /**< Stats */ +}; + +/* + * Port OUT + * + */ +/** Output port statistics */ +struct rte_port_out_stats { + uint64_t n_pkts_in; + uint64_t n_pkts_drop; +}; + +/** + * Output port create + * + * @param params + * Parameters for output port creation + * @param socket_id + * CPU socket ID (e.g. for memory allocation purpose) + * @return + * Handle to output port instance + */ +typedef void* (*rte_port_out_op_create)(void *params, int socket_id); + +/** + * Output port free + * + * @param port + * Handle to output port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_free)(void *port); + +/** + * Output port single packet TX + * + * @param port + * Handle to output port instance + * @param pkt + * Input packet + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_tx)( + void *port, + struct rte_mbuf *pkt); + +/** + * Output port packet burst TX + * + * @param port + * Handle to output port instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array will not be accessed. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_tx_bulk)( + void *port, + struct rte_mbuf **pkt, + uint64_t pkts_mask); + +/** + * Output port flush + * + * @param port + * Handle to output port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_flush)(void *port); + +/** + * Output port stats read + * + * @param port + * Handle to output port instance + * @param stats + * Handle to port_out stats struct to copy data + * @param clear + * Flag indicating that stats should be cleared after read + * + * @return + * Error code or 0 on success. + */ +typedef int (*rte_port_out_op_stats_read)( + void *port, + struct rte_port_out_stats *stats, + int clear); + +/** Output port interface defining the output port operation */ +struct rte_port_out_ops { + rte_port_out_op_create f_create; /**< Create */ + rte_port_out_op_free f_free; /**< Free */ + rte_port_out_op_tx f_tx; /**< Packet TX (single packet) */ + rte_port_out_op_tx_bulk f_tx_bulk; /**< Packet TX (packet burst) */ + rte_port_out_op_flush f_flush; /**< Flush */ + rte_port_out_op_stats_read f_stats; /**< Stats */ +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_ethdev.c b/src/seastar/dpdk/lib/librte_port/rte_port_ethdev.c new file mode 100644 index 00000000..d5c5fba5 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_ethdev.c @@ -0,0 +1,553 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> +#include <stdint.h> + +#include <rte_mbuf.h> +#include <rte_ethdev.h> +#include <rte_malloc.h> + +#include "rte_port_ethdev.h" + +/* + * Port ETHDEV Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_ETHDEV_READER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_ETHDEV_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ethdev_reader { + struct rte_port_in_stats stats; + + uint16_t queue_id; + uint8_t port_id; +}; + +static void * +rte_port_ethdev_reader_create(void *params, int socket_id) +{ + struct rte_port_ethdev_reader_params *conf = + params; + struct rte_port_ethdev_reader *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->port_id = conf->port_id; + port->queue_id = conf->queue_id; + + return port; +} + +static int +rte_port_ethdev_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_ethdev_reader *p = + port; + uint16_t rx_pkt_cnt; + + rx_pkt_cnt = rte_eth_rx_burst(p->port_id, p->queue_id, pkts, n_pkts); + RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(p, rx_pkt_cnt); + return rx_pkt_cnt; +} + +static int +rte_port_ethdev_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int rte_port_ethdev_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_ethdev_reader *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port ETHDEV Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ethdev_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint64_t bsz_mask; + uint16_t queue_id; + uint8_t port_id; +}; + +static void * +rte_port_ethdev_writer_create(void *params, int socket_id) +{ + struct rte_port_ethdev_writer_params *conf = + params; + struct rte_port_ethdev_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->port_id = conf->port_id; + port->queue_id = conf->queue_id; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + + return port; +} + +static inline void +send_burst(struct rte_port_ethdev_writer *p) +{ + uint32_t nb_tx; + + nb_tx = rte_eth_tx_burst(p->port_id, p->queue_id, + p->tx_buf, p->tx_buf_count); + + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_ethdev_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ethdev_writer *p = + port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_ethdev_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_ethdev_writer *p = + port; + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) + send_burst(p); + + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + n_pkts_ok = rte_eth_tx_burst(p->port_id, p->queue_id, pkts, + n_pkts); + + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok); + for ( ; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + + rte_pktmbuf_free(pkt); + } + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst(p); + } + + return 0; +} + +static int +rte_port_ethdev_writer_flush(void *port) +{ + struct rte_port_ethdev_writer *p = + port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_ethdev_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_ethdev_writer_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_ethdev_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ethdev_writer *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port ETHDEV Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ethdev_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint64_t bsz_mask; + uint64_t n_retries; + uint16_t queue_id; + uint8_t port_id; +}; + +static void * +rte_port_ethdev_writer_nodrop_create(void *params, int socket_id) +{ + struct rte_port_ethdev_writer_nodrop_params *conf = + params; + struct rte_port_ethdev_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->port_id = conf->port_id; + port->queue_id = conf->queue_id; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static inline void +send_burst_nodrop(struct rte_port_ethdev_writer_nodrop *p) +{ + uint32_t nb_tx = 0, i; + + nb_tx = rte_eth_tx_burst(p->port_id, p->queue_id, p->tx_buf, + p->tx_buf_count); + + /* We sent all the packets in a first try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + + for (i = 0; i < p->n_retries; i++) { + nb_tx += rte_eth_tx_burst(p->port_id, p->queue_id, + p->tx_buf + nb_tx, p->tx_buf_count - nb_tx); + + /* We sent all the packets in more than one try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + } + + /* We didn't send the packets in maximum allowed attempts */ + RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_ethdev_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ethdev_writer_nodrop *p = + port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_ethdev_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_ethdev_writer_nodrop *p = + port; + + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) + send_burst_nodrop(p); + + RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + n_pkts_ok = rte_eth_tx_burst(p->port_id, p->queue_id, pkts, + n_pkts); + + if (n_pkts_ok >= n_pkts) + return 0; + + /* + * If we did not manage to send all packets in single burst, + * move remaining packets to the buffer and call send burst. + */ + for (; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + p->tx_buf[p->tx_buf_count++] = pkt; + } + send_burst_nodrop(p); + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + } + + return 0; +} + +static int +rte_port_ethdev_writer_nodrop_flush(void *port) +{ + struct rte_port_ethdev_writer_nodrop *p = + port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_ethdev_writer_nodrop_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_ethdev_writer_nodrop_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_ethdev_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ethdev_writer_nodrop *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_ethdev_reader_ops = { + .f_create = rte_port_ethdev_reader_create, + .f_free = rte_port_ethdev_reader_free, + .f_rx = rte_port_ethdev_reader_rx, + .f_stats = rte_port_ethdev_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_ethdev_writer_ops = { + .f_create = rte_port_ethdev_writer_create, + .f_free = rte_port_ethdev_writer_free, + .f_tx = rte_port_ethdev_writer_tx, + .f_tx_bulk = rte_port_ethdev_writer_tx_bulk, + .f_flush = rte_port_ethdev_writer_flush, + .f_stats = rte_port_ethdev_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_ethdev_writer_nodrop_ops = { + .f_create = rte_port_ethdev_writer_nodrop_create, + .f_free = rte_port_ethdev_writer_nodrop_free, + .f_tx = rte_port_ethdev_writer_nodrop_tx, + .f_tx_bulk = rte_port_ethdev_writer_nodrop_tx_bulk, + .f_flush = rte_port_ethdev_writer_nodrop_flush, + .f_stats = rte_port_ethdev_writer_nodrop_stats_read, +}; diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_ethdev.h b/src/seastar/dpdk/lib/librte_port/rte_port_ethdev.h new file mode 100644 index 00000000..201a79e4 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_ethdev.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_ETHDEV_H__ +#define __INCLUDE_RTE_PORT_ETHDEV_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Ethernet Device + * + * ethdev_reader: input port built on top of pre-initialized NIC RX queue + * ethdev_writer: output port built on top of pre-initialized NIC TX queue + * + ***/ + +#include <stdint.h> + +#include "rte_port.h" + +/** ethdev_reader port parameters */ +struct rte_port_ethdev_reader_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; +}; + +/** ethdev_reader port operations */ +extern struct rte_port_in_ops rte_port_ethdev_reader_ops; + +/** ethdev_writer port parameters */ +struct rte_port_ethdev_writer_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; + + /** Recommended burst size to NIC TX queue. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** ethdev_writer port operations */ +extern struct rte_port_out_ops rte_port_ethdev_writer_ops; + +/** ethdev_writer_nodrop port parameters */ +struct rte_port_ethdev_writer_nodrop_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; + + /** Recommended burst size to NIC TX queue. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** ethdev_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ethdev_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_fd.c b/src/seastar/dpdk/lib/librte_port/rte_port_fd.c new file mode 100644 index 00000000..b5b37291 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_fd.c @@ -0,0 +1,547 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> +#include <stdint.h> +#include <unistd.h> + +#include <rte_mbuf.h> +#include <rte_malloc.h> + +#include "rte_port_fd.h" + +/* + * Port FD Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_reader { + struct rte_port_in_stats stats; + int fd; + uint32_t mtu; + struct rte_mempool *mempool; +}; + +static void * +rte_port_fd_reader_create(void *params, int socket_id) +{ + struct rte_port_fd_reader_params *conf = + params; + struct rte_port_fd_reader *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__); + return NULL; + } + if (conf->fd < 0) { + RTE_LOG(ERR, PORT, "%s: Invalid file descriptor\n", __func__); + return NULL; + } + if (conf->mtu == 0) { + RTE_LOG(ERR, PORT, "%s: Invalid MTU\n", __func__); + return NULL; + } + if (conf->mempool == NULL) { + RTE_LOG(ERR, PORT, "%s: Invalid mempool\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->mtu = conf->mtu; + port->mempool = conf->mempool; + + return port; +} + +static int +rte_port_fd_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_fd_reader *p = port; + uint32_t i, j; + + if (rte_pktmbuf_alloc_bulk(p->mempool, pkts, n_pkts) != 0) + return 0; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void *); + ssize_t n_bytes; + + n_bytes = read(p->fd, pkt_data, (size_t) p->mtu); + if (n_bytes <= 0) + break; + + pkt->data_len = n_bytes; + pkt->pkt_len = n_bytes; + } + + for (j = i; j < n_pkts; j++) + rte_pktmbuf_free(pkts[j]); + + RTE_PORT_FD_READER_STATS_PKTS_IN_ADD(p, i); + + return i; +} + +static int +rte_port_fd_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int rte_port_fd_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_fd_reader *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port FD Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint32_t fd; +}; + +static void * +rte_port_fd_writer_create(void *params, int socket_id) +{ + struct rte_port_fd_writer_params *conf = + params; + struct rte_port_fd_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + return port; +} + +static inline void +send_burst(struct rte_port_fd_writer *p) +{ + uint32_t i; + + for (i = 0; i < p->tx_buf_count; i++) { + struct rte_mbuf *pkt = p->tx_buf[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void*); + size_t n_bytes = rte_pktmbuf_data_len(pkt); + ssize_t ret; + + ret = write(p->fd, pkt_data, n_bytes); + if (ret < 0) + break; + } + + RTE_PORT_FD_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i); + + for (i = 0; i < p->tx_buf_count; i++) + rte_pktmbuf_free(p->tx_buf[i]); + + p->tx_buf_count = 0; +} + +static int +rte_port_fd_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_fd_writer *p = + port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_fd_writer *p = + port; + uint32_t tx_buf_count = p->tx_buf_count; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + p->tx_buf[tx_buf_count++] = pkts[i]; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + } else + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_flush(void *port) +{ + struct rte_port_fd_writer *p = + port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_fd_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_fd_writer_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_fd_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_fd_writer *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port FD Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + do { port->stats.n_pkts_in += val; } while (0) +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + do { port->stats.n_pkts_drop += val; } while (0) + +#else + +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_fd_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint64_t n_retries; + uint32_t fd; +}; + +static void * +rte_port_fd_writer_nodrop_create(void *params, int socket_id) +{ + struct rte_port_fd_writer_nodrop_params *conf = + params; + struct rte_port_fd_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->fd < 0) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->fd = conf->fd; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static inline void +send_burst_nodrop(struct rte_port_fd_writer_nodrop *p) +{ + uint64_t n_retries; + uint32_t i; + + n_retries = 0; + for (i = 0; (i < p->tx_buf_count) && (n_retries < p->n_retries); i++) { + struct rte_mbuf *pkt = p->tx_buf[i]; + void *pkt_data = rte_pktmbuf_mtod(pkt, void*); + size_t n_bytes = rte_pktmbuf_data_len(pkt); + + for ( ; n_retries < p->n_retries; n_retries++) { + ssize_t ret; + + ret = write(p->fd, pkt_data, n_bytes); + if (ret) + break; + } + } + + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - i); + + for (i = 0; i < p->tx_buf_count; i++) + rte_pktmbuf_free(p->tx_buf[i]); + + p->tx_buf_count = 0; +} + +static int +rte_port_fd_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_fd_writer_nodrop *p = + port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_fd_writer_nodrop *p = + port; + uint32_t tx_buf_count = p->tx_buf_count; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + p->tx_buf[tx_buf_count++] = pkts[i]; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + } else + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_FD_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_flush(void *port) +{ + struct rte_port_fd_writer_nodrop *p = + port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_fd_writer_nodrop_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_fd_writer_nodrop_flush(port); + rte_free(port); + +return 0; +} + +static int rte_port_fd_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_fd_writer_nodrop *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_fd_reader_ops = { + .f_create = rte_port_fd_reader_create, + .f_free = rte_port_fd_reader_free, + .f_rx = rte_port_fd_reader_rx, + .f_stats = rte_port_fd_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_fd_writer_ops = { + .f_create = rte_port_fd_writer_create, + .f_free = rte_port_fd_writer_free, + .f_tx = rte_port_fd_writer_tx, + .f_tx_bulk = rte_port_fd_writer_tx_bulk, + .f_flush = rte_port_fd_writer_flush, + .f_stats = rte_port_fd_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_fd_writer_nodrop_ops = { + .f_create = rte_port_fd_writer_nodrop_create, + .f_free = rte_port_fd_writer_nodrop_free, + .f_tx = rte_port_fd_writer_nodrop_tx, + .f_tx_bulk = rte_port_fd_writer_nodrop_tx_bulk, + .f_flush = rte_port_fd_writer_nodrop_flush, + .f_stats = rte_port_fd_writer_nodrop_stats_read, +}; diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_fd.h b/src/seastar/dpdk/lib/librte_port/rte_port_fd.h new file mode 100644 index 00000000..77a2d31b --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_fd.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_FD_H__ +#define __INCLUDE_RTE_PORT_FD_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port FD Device + * + * fd_reader: input port built on top of valid non-blocking file descriptor + * fd_writer: output port built on top of valid non-blocking file descriptor + * + ***/ + +#include <stdint.h> + +#include <rte_mempool.h> +#include "rte_port.h" + +/** fd_reader port parameters */ +struct rte_port_fd_reader_params { + /** File descriptor */ + int fd; + + /** Maximum Transfer Unit (MTU) */ + uint32_t mtu; + + /** Pre-initialized buffer pool */ + struct rte_mempool *mempool; +}; + +/** fd_reader port operations */ +extern struct rte_port_in_ops rte_port_fd_reader_ops; + +/** fd_writer port parameters */ +struct rte_port_fd_writer_params { + /** File descriptor */ + int fd; + + /**< Recommended write burst size. The actual burst size can be + * bigger or smaller than this value. + */ + uint32_t tx_burst_sz; +}; + +/** fd_writer port operations */ +extern struct rte_port_out_ops rte_port_fd_writer_ops; + +/** fd_writer_nodrop port parameters */ +struct rte_port_fd_writer_nodrop_params { + /** File descriptor */ + int fd; + + /**< Recommended write burst size. The actual burst size can be + * bigger or smaller than this value. + */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** fd_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_fd_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_frag.c b/src/seastar/dpdk/lib/librte_port/rte_port_frag.c new file mode 100644 index 00000000..a00c9ae1 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_frag.c @@ -0,0 +1,306 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> + +#include <rte_ether.h> +#include <rte_ip_frag.h> +#include <rte_memory.h> + +#include "rte_port_frag.h" + +/* Max number of fragments per packet allowed */ +#define RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET 0x80 + +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(port, val) + +#endif + +typedef int32_t + (*frag_op)(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect); + +struct rte_port_ring_reader_frag { + struct rte_port_in_stats stats; + + /* Input parameters */ + struct rte_ring *ring; + uint32_t mtu; + uint32_t metadata_size; + struct rte_mempool *pool_direct; + struct rte_mempool *pool_indirect; + + /* Internal buffers */ + struct rte_mbuf *pkts[RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_mbuf *frags[RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET]; + uint32_t n_pkts; + uint32_t pos_pkts; + uint32_t n_frags; + uint32_t pos_frags; + + frag_op f_frag; +} __rte_cache_aligned; + +static void * +rte_port_ring_reader_frag_create(void *params, int socket_id, int is_ipv4) +{ + struct rte_port_ring_reader_frag_params *conf = + params; + struct rte_port_ring_reader_frag *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter conf is NULL\n", __func__); + return NULL; + } + if (conf->ring == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter ring is NULL\n", __func__); + return NULL; + } + if (conf->mtu == 0) { + RTE_LOG(ERR, PORT, "%s: Parameter mtu is invalid\n", __func__); + return NULL; + } + if (conf->pool_direct == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter pool_direct is NULL\n", + __func__); + return NULL; + } + if (conf->pool_indirect == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter pool_indirect is NULL\n", + __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), RTE_CACHE_LINE_SIZE, + socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->mtu = conf->mtu; + port->metadata_size = conf->metadata_size; + port->pool_direct = conf->pool_direct; + port->pool_indirect = conf->pool_indirect; + + port->n_pkts = 0; + port->pos_pkts = 0; + port->n_frags = 0; + port->pos_frags = 0; + + port->f_frag = (is_ipv4) ? + rte_ipv4_fragment_packet : rte_ipv6_fragment_packet; + + return port; +} + +static void * +rte_port_ring_reader_ipv4_frag_create(void *params, int socket_id) +{ + return rte_port_ring_reader_frag_create(params, socket_id, 1); +} + +static void * +rte_port_ring_reader_ipv6_frag_create(void *params, int socket_id) +{ + return rte_port_ring_reader_frag_create(params, socket_id, 0); +} + +static int +rte_port_ring_reader_frag_rx(void *port, + struct rte_mbuf **pkts, + uint32_t n_pkts) +{ + struct rte_port_ring_reader_frag *p = + port; + uint32_t n_pkts_out; + + n_pkts_out = 0; + + /* Get packets from the "frag" buffer */ + if (p->n_frags >= n_pkts) { + memcpy(pkts, &p->frags[p->pos_frags], n_pkts * sizeof(void *)); + p->pos_frags += n_pkts; + p->n_frags -= n_pkts; + + return n_pkts; + } + + memcpy(pkts, &p->frags[p->pos_frags], p->n_frags * sizeof(void *)); + n_pkts_out = p->n_frags; + p->n_frags = 0; + + /* Look to "pkts" buffer to get more packets */ + for ( ; ; ) { + struct rte_mbuf *pkt; + uint32_t n_pkts_to_provide, i; + int status; + + /* If "pkts" buffer is empty, read packet burst from ring */ + if (p->n_pkts == 0) { + p->n_pkts = rte_ring_sc_dequeue_burst(p->ring, + (void **) p->pkts, RTE_PORT_IN_BURST_SIZE_MAX, + NULL); + RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(p, p->n_pkts); + if (p->n_pkts == 0) + return n_pkts_out; + p->pos_pkts = 0; + } + + /* Read next packet from "pkts" buffer */ + pkt = p->pkts[p->pos_pkts++]; + p->n_pkts--; + + /* If not jumbo, pass current packet to output */ + if (pkt->pkt_len <= p->mtu) { + pkts[n_pkts_out++] = pkt; + + n_pkts_to_provide = n_pkts - n_pkts_out; + if (n_pkts_to_provide == 0) + return n_pkts; + + continue; + } + + /* Fragment current packet into the "frags" buffer */ + status = p->f_frag( + pkt, + p->frags, + RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET, + p->mtu, + p->pool_direct, + p->pool_indirect + ); + + if (status < 0) { + rte_pktmbuf_free(pkt); + RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(p, 1); + continue; + } + + p->n_frags = (uint32_t) status; + p->pos_frags = 0; + + /* Copy meta-data from input jumbo packet to its fragments */ + for (i = 0; i < p->n_frags; i++) { + uint8_t *src = + RTE_MBUF_METADATA_UINT8_PTR(pkt, sizeof(struct rte_mbuf)); + uint8_t *dst = + RTE_MBUF_METADATA_UINT8_PTR(p->frags[i], sizeof(struct rte_mbuf)); + + memcpy(dst, src, p->metadata_size); + } + + /* Free input jumbo packet */ + rte_pktmbuf_free(pkt); + + /* Get packets from "frag" buffer */ + n_pkts_to_provide = n_pkts - n_pkts_out; + if (p->n_frags >= n_pkts_to_provide) { + memcpy(&pkts[n_pkts_out], p->frags, + n_pkts_to_provide * sizeof(void *)); + p->n_frags -= n_pkts_to_provide; + p->pos_frags += n_pkts_to_provide; + + return n_pkts; + } + + memcpy(&pkts[n_pkts_out], p->frags, + p->n_frags * sizeof(void *)); + n_pkts_out += p->n_frags; + p->n_frags = 0; + } +} + +static int +rte_port_ring_reader_frag_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter port is NULL\n", __func__); + return -1; + } + + rte_free(port); + + return 0; +} + +static int +rte_port_frag_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_ring_reader_frag *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_ring_reader_ipv4_frag_ops = { + .f_create = rte_port_ring_reader_ipv4_frag_create, + .f_free = rte_port_ring_reader_frag_free, + .f_rx = rte_port_ring_reader_frag_rx, + .f_stats = rte_port_frag_reader_stats_read, +}; + +struct rte_port_in_ops rte_port_ring_reader_ipv6_frag_ops = { + .f_create = rte_port_ring_reader_ipv6_frag_create, + .f_free = rte_port_ring_reader_frag_free, + .f_rx = rte_port_ring_reader_frag_rx, + .f_stats = rte_port_frag_reader_stats_read, +}; diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_frag.h b/src/seastar/dpdk/lib/librte_port/rte_port_frag.h new file mode 100644 index 00000000..0085ff7c --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_frag.h @@ -0,0 +1,101 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_IP_FRAG_H__ +#define __INCLUDE_RTE_PORT_IP_FRAG_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port for IPv4 Fragmentation + * + * This port is built on top of pre-initialized single consumer rte_ring. In + * order to minimize the amount of packets stored in the ring at any given + * time, the IP fragmentation functionality is executed on ring read operation, + * hence this port is implemented as an input port. A regular ring_writer port + * can be created to write to the same ring. + * + * The packets written to the ring are either complete IP datagrams or jumbo + * frames (i.e. IP packets with length bigger than provided MTU value). The + * packets read from the ring are all non-jumbo frames. The complete IP + * datagrams written to the ring are not changed. The jumbo frames are + * fragmented into several IP packets with length less or equal to MTU. + * + ***/ + +#include <stdint.h> + +#include <rte_ring.h> + +#include "rte_port.h" + +/** ring_reader_ipv4_frag port parameters */ +struct rte_port_ring_reader_frag_params { + /** Underlying single consumer ring that has to be pre-initialized. */ + struct rte_ring *ring; + + /** Maximum Transfer Unit (MTU). Maximum IP packet size (in bytes). */ + uint32_t mtu; + + /** Size of application dependent meta-data stored per each input packet + that has to be copied to each of the fragments originating from the + same input IP datagram. */ + uint32_t metadata_size; + + /** Pre-initialized buffer pool used for allocating direct buffers for + the output fragments. */ + struct rte_mempool *pool_direct; + + /** Pre-initialized buffer pool used for allocating indirect buffers for + the output fragments. */ + struct rte_mempool *pool_indirect; +}; + +#define rte_port_ring_reader_ipv4_frag_params rte_port_ring_reader_frag_params + +#define rte_port_ring_reader_ipv6_frag_params rte_port_ring_reader_frag_params + +/** ring_reader_ipv4_frag port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ipv4_frag_ops; + +/** ring_reader_ipv6_frag port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ipv6_frag_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_kni.c b/src/seastar/dpdk/lib/librte_port/rte_port_kni.c new file mode 100644 index 00000000..2515fb2a --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_kni.c @@ -0,0 +1,545 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Ethan Zhuang <zhuangwj@gmail.com>. + * Copyright(c) 2016 Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> + +#include <rte_common.h> +#include <rte_malloc.h> +#include <rte_kni.h> + +#include "rte_port_kni.h" + +/* + * Port KNI Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_KNI_READER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_KNI_READER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_KNI_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_KNI_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_kni_reader { + struct rte_port_in_stats stats; + + struct rte_kni *kni; +}; + +static void * +rte_port_kni_reader_create(void *params, int socket_id) +{ + struct rte_port_kni_reader_params *conf = + params; + struct rte_port_kni_reader *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->kni = conf->kni; + + return port; +} + +static int +rte_port_kni_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_kni_reader *p = + port; + uint16_t rx_pkt_cnt; + + rx_pkt_cnt = rte_kni_rx_burst(p->kni, pkts, n_pkts); + RTE_PORT_KNI_READER_STATS_PKTS_IN_ADD(p, rx_pkt_cnt); + return rx_pkt_cnt; +} + +static int +rte_port_kni_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int rte_port_kni_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_kni_reader *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port KNI Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_KNI_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_KNI_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_kni_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + uint64_t bsz_mask; + struct rte_kni *kni; +}; + +static void * +rte_port_kni_writer_create(void *params, int socket_id) +{ + struct rte_port_kni_writer_params *conf = + params; + struct rte_port_kni_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->kni = conf->kni; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + + return port; +} + +static inline void +send_burst(struct rte_port_kni_writer *p) +{ + uint32_t nb_tx; + + nb_tx = rte_kni_tx_burst(p->kni, p->tx_buf, p->tx_buf_count); + + RTE_PORT_KNI_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for (; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_kni_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_kni_writer *p = + port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_kni_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_kni_writer *p = + port; + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) + send_burst(p); + + RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + n_pkts_ok = rte_kni_tx_burst(p->kni, pkts, n_pkts); + + RTE_PORT_KNI_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok); + for (; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + + rte_pktmbuf_free(pkt); + } + } else { + for (; pkts_mask;) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst(p); + } + + return 0; +} + +static int +rte_port_kni_writer_flush(void *port) +{ + struct rte_port_kni_writer *p = + port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_kni_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_kni_writer_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_kni_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_kni_writer *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port KNI Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_kni_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + uint64_t bsz_mask; + uint64_t n_retries; + struct rte_kni *kni; +}; + +static void * +rte_port_kni_writer_nodrop_create(void *params, int socket_id) +{ + struct rte_port_kni_writer_nodrop_params *conf = + params; + struct rte_port_kni_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->kni = conf->kni; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static inline void +send_burst_nodrop(struct rte_port_kni_writer_nodrop *p) +{ + uint32_t nb_tx = 0, i; + + nb_tx = rte_kni_tx_burst(p->kni, p->tx_buf, p->tx_buf_count); + + /* We sent all the packets in a first try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + + for (i = 0; i < p->n_retries; i++) { + nb_tx += rte_kni_tx_burst(p->kni, + p->tx_buf + nb_tx, + p->tx_buf_count - nb_tx); + + /* We sent all the packets in more than one try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + } + + /* We didn't send the packets in maximum allowed attempts */ + RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_kni_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_kni_writer_nodrop *p = + port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_KNI_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_kni_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_kni_writer_nodrop *p = + port; + + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) + send_burst_nodrop(p); + + RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + n_pkts_ok = rte_kni_tx_burst(p->kni, pkts, n_pkts); + + if (n_pkts_ok >= n_pkts) + return 0; + + /* + * If we didn't manage to send all packets in single burst, move + * remaining packets to the buffer and call send burst. + */ + for (; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + p->tx_buf[p->tx_buf_count++] = pkt; + } + send_burst_nodrop(p); + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_KNI_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + } + + return 0; +} + +static int +rte_port_kni_writer_nodrop_flush(void *port) +{ + struct rte_port_kni_writer_nodrop *p = + port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_kni_writer_nodrop_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_kni_writer_nodrop_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_kni_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_kni_writer_nodrop *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_kni_reader_ops = { + .f_create = rte_port_kni_reader_create, + .f_free = rte_port_kni_reader_free, + .f_rx = rte_port_kni_reader_rx, + .f_stats = rte_port_kni_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_kni_writer_ops = { + .f_create = rte_port_kni_writer_create, + .f_free = rte_port_kni_writer_free, + .f_tx = rte_port_kni_writer_tx, + .f_tx_bulk = rte_port_kni_writer_tx_bulk, + .f_flush = rte_port_kni_writer_flush, + .f_stats = rte_port_kni_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_kni_writer_nodrop_ops = { + .f_create = rte_port_kni_writer_nodrop_create, + .f_free = rte_port_kni_writer_nodrop_free, + .f_tx = rte_port_kni_writer_nodrop_tx, + .f_tx_bulk = rte_port_kni_writer_nodrop_tx_bulk, + .f_flush = rte_port_kni_writer_nodrop_flush, + .f_stats = rte_port_kni_writer_nodrop_stats_read, +}; diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_kni.h b/src/seastar/dpdk/lib/librte_port/rte_port_kni.h new file mode 100644 index 00000000..4b60689c --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_kni.h @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Ethan Zhuang <zhuangwj@gmail.com>. + * Copyright(c) 2016 Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_KNI_H__ +#define __INCLUDE_RTE_PORT_KNI_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port KNI Interface + * + * kni_reader: input port built on top of pre-initialized KNI interface + * kni_writer: output port built on top of pre-initialized KNI interface + * + ***/ + +#include <stdint.h> + +#include <rte_kni.h> + +#include "rte_port.h" + +/** kni_reader port parameters */ +struct rte_port_kni_reader_params { + /** KNI interface reference */ + struct rte_kni *kni; +}; + +/** kni_reader port operations */ +extern struct rte_port_in_ops rte_port_kni_reader_ops; + + +/** kni_writer port parameters */ +struct rte_port_kni_writer_params { + /** KNI interface reference */ + struct rte_kni *kni; + /** Burst size to KNI interface. */ + uint32_t tx_burst_sz; +}; + +/** kni_writer port operations */ +extern struct rte_port_out_ops rte_port_kni_writer_ops; + +/** kni_writer_nodrop port parameters */ +struct rte_port_kni_writer_nodrop_params { + /** KNI interface reference */ + struct rte_kni *kni; + /** Burst size to KNI interface. */ + uint32_t tx_burst_sz; + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** kni_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_kni_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_ras.c b/src/seastar/dpdk/lib/librte_port/rte_port_ras.c new file mode 100644 index 00000000..415fadd5 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_ras.c @@ -0,0 +1,359 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> + +#include <rte_ether.h> +#include <rte_ip_frag.h> +#include <rte_cycles.h> +#include <rte_log.h> + +#include "rte_port_ras.h" + +#ifndef RTE_PORT_RAS_N_BUCKETS +#define RTE_PORT_RAS_N_BUCKETS 4094 +#endif + +#ifndef RTE_PORT_RAS_N_ENTRIES_PER_BUCKET +#define RTE_PORT_RAS_N_ENTRIES_PER_BUCKET 8 +#endif + +#ifndef RTE_PORT_RAS_N_ENTRIES +#define RTE_PORT_RAS_N_ENTRIES (RTE_PORT_RAS_N_BUCKETS * RTE_PORT_RAS_N_ENTRIES_PER_BUCKET) +#endif + +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ring_writer_ras; + +typedef void (*ras_op)( + struct rte_port_ring_writer_ras *p, + struct rte_mbuf *pkt); + +static void +process_ipv4(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt); +static void +process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt); + +struct rte_port_ring_writer_ras { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_ring *ring; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + struct rte_ip_frag_tbl *frag_tbl; + struct rte_ip_frag_death_row death_row; + + ras_op f_ras; +}; + +static void * +rte_port_ring_writer_ras_create(void *params, int socket_id, int is_ipv4) +{ + struct rte_port_ring_writer_ras_params *conf = + params; + struct rte_port_ring_writer_ras *port; + uint64_t frag_cycles; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter conf is NULL\n", __func__); + return NULL; + } + if (conf->ring == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter ring is NULL\n", __func__); + return NULL; + } + if ((conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PORT, "%s: Parameter tx_burst_sz is invalid\n", + __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate socket\n", __func__); + return NULL; + } + + /* Create fragmentation table */ + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * MS_PER_S; + frag_cycles *= 100; + + port->frag_tbl = rte_ip_frag_table_create( + RTE_PORT_RAS_N_BUCKETS, + RTE_PORT_RAS_N_ENTRIES_PER_BUCKET, + RTE_PORT_RAS_N_ENTRIES, + frag_cycles, + socket_id); + + if (port->frag_tbl == NULL) { + RTE_LOG(ERR, PORT, "%s: rte_ip_frag_table_create failed\n", + __func__); + rte_free(port); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + port->f_ras = (is_ipv4 == 1) ? process_ipv4 : process_ipv6; + + return port; +} + +static void * +rte_port_ring_writer_ipv4_ras_create(void *params, int socket_id) +{ + return rte_port_ring_writer_ras_create(params, socket_id, 1); +} + +static void * +rte_port_ring_writer_ipv6_ras_create(void *params, int socket_id) +{ + return rte_port_ring_writer_ras_create(params, socket_id, 0); +} + +static inline void +send_burst(struct rte_port_ring_writer_ras *p) +{ + uint32_t nb_tx; + + nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count, NULL); + + RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static void +process_ipv4(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt) +{ + /* Assume there is no ethernet header */ + struct ipv4_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv4_hdr *); + + /* Get "More fragments" flag and fragment offset */ + uint16_t frag_field = rte_be_to_cpu_16(pkt_hdr->fragment_offset); + uint16_t frag_offset = (uint16_t)(frag_field & IPV4_HDR_OFFSET_MASK); + uint16_t frag_flag = (uint16_t)(frag_field & IPV4_HDR_MF_FLAG); + + /* If it is a fragmented packet, then try to reassemble */ + if ((frag_flag == 0) && (frag_offset == 0)) + p->tx_buf[p->tx_buf_count++] = pkt; + else { + struct rte_mbuf *mo; + struct rte_ip_frag_tbl *tbl = p->frag_tbl; + struct rte_ip_frag_death_row *dr = &p->death_row; + + pkt->l3_len = sizeof(*pkt_hdr); + + /* Process this fragment */ + mo = rte_ipv4_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), + pkt_hdr); + if (mo != NULL) + p->tx_buf[p->tx_buf_count++] = mo; + + rte_ip_frag_free_death_row(&p->death_row, 3); + } +} + +static void +process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt) +{ + /* Assume there is no ethernet header */ + struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr *); + + struct ipv6_extension_fragment *frag_hdr; + uint16_t frag_data = 0; + frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr); + if (frag_hdr != NULL) + frag_data = rte_be_to_cpu_16(frag_hdr->frag_data); + + /* If it is a fragmented packet, then try to reassemble */ + if ((frag_data & RTE_IPV6_FRAG_USED_MASK) == 0) + p->tx_buf[p->tx_buf_count++] = pkt; + else { + struct rte_mbuf *mo; + struct rte_ip_frag_tbl *tbl = p->frag_tbl; + struct rte_ip_frag_death_row *dr = &p->death_row; + + pkt->l3_len = sizeof(*pkt_hdr) + sizeof(*frag_hdr); + + /* Process this fragment */ + mo = rte_ipv6_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), pkt_hdr, + frag_hdr); + if (mo != NULL) + p->tx_buf[p->tx_buf_count++] = mo; + + rte_ip_frag_free_death_row(&p->death_row, 3); + } +} + +static int +rte_port_ring_writer_ras_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer_ras *p = + port; + + RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1); + p->f_ras(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_writer_ras_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_ring_writer_ras *p = + port; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + + RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1); + p->f_ras(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + } + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1); + p->f_ras(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + pkts_mask &= ~pkt_mask; + } + } + + return 0; +} + +static int +rte_port_ring_writer_ras_flush(void *port) +{ + struct rte_port_ring_writer_ras *p = + port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_writer_ras_free(void *port) +{ + struct rte_port_ring_writer_ras *p = + port; + + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter port is NULL\n", __func__); + return -1; + } + + rte_port_ring_writer_ras_flush(port); + rte_ip_frag_table_destroy(p->frag_tbl); + rte_free(port); + + return 0; +} + +static int +rte_port_ras_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ring_writer_ras *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops = { + .f_create = rte_port_ring_writer_ipv4_ras_create, + .f_free = rte_port_ring_writer_ras_free, + .f_tx = rte_port_ring_writer_ras_tx, + .f_tx_bulk = rte_port_ring_writer_ras_tx_bulk, + .f_flush = rte_port_ring_writer_ras_flush, + .f_stats = rte_port_ras_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_writer_ipv6_ras_ops = { + .f_create = rte_port_ring_writer_ipv6_ras_create, + .f_free = rte_port_ring_writer_ras_free, + .f_tx = rte_port_ring_writer_ras_tx, + .f_tx_bulk = rte_port_ring_writer_ras_tx_bulk, + .f_flush = rte_port_ring_writer_ras_flush, + .f_stats = rte_port_ras_writer_stats_read, +}; diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_ras.h b/src/seastar/dpdk/lib/librte_port/rte_port_ras.h new file mode 100644 index 00000000..5a16f831 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_ras.h @@ -0,0 +1,90 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_RAS_H__ +#define __INCLUDE_RTE_PORT_RAS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port for IPv4 Reassembly + * + * This port is built on top of pre-initialized single producer rte_ring. In + * order to minimize the amount of packets stored in the ring at any given + * time, the IP reassembly functionality is executed on ring write operation, + * hence this port is implemented as an output port. A regular ring_reader port + * can be created to read from the same ring. + * + * The packets written to the ring are either complete IP datagrams or IP + * fragments. The packets read from the ring are all complete IP datagrams, + * either jumbo frames (i.e. IP packets with length bigger than MTU) or not. + * The complete IP datagrams written to the ring are not changed. The IP + * fragments written to the ring are first reassembled and into complete IP + * datagrams or dropped on error or IP reassembly time-out. + * + ***/ + +#include <stdint.h> + +#include <rte_ring.h> + +#include "rte_port.h" + +/** ring_writer_ipv4_ras port parameters */ +struct rte_port_ring_writer_ras_params { + /** Underlying single consumer ring that has to be pre-initialized. */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be bigger + or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +#define rte_port_ring_writer_ipv4_ras_params rte_port_ring_writer_ras_params + +#define rte_port_ring_writer_ipv6_ras_params rte_port_ring_writer_ras_params + +/** ring_writer_ipv4_ras port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops; + +/** ring_writer_ipv6_ras port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ipv6_ras_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_ring.c b/src/seastar/dpdk/lib/librte_port/rte_port_ring.c new file mode 100644 index 00000000..64bd965f --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_ring.c @@ -0,0 +1,816 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> +#include <stdint.h> + +#include <rte_mbuf.h> +#include <rte_ring.h> +#include <rte_malloc.h> + +#include "rte_port_ring.h" + +/* + * Port RING Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_READER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ring_reader { + struct rte_port_in_stats stats; + + struct rte_ring *ring; +}; + +static void * +rte_port_ring_reader_create_internal(void *params, int socket_id, + uint32_t is_multi) +{ + struct rte_port_ring_reader_params *conf = + params; + struct rte_port_ring_reader *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->ring == NULL) || + (conf->ring->cons.single && is_multi) || + (!(conf->ring->cons.single) && !is_multi)) { + RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + + return port; +} + +static void * +rte_port_ring_reader_create(void *params, int socket_id) +{ + return rte_port_ring_reader_create_internal(params, socket_id, 0); +} + +static void * +rte_port_ring_multi_reader_create(void *params, int socket_id) +{ + return rte_port_ring_reader_create_internal(params, socket_id, 1); +} + +static int +rte_port_ring_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_ring_reader *p = port; + uint32_t nb_rx; + + nb_rx = rte_ring_sc_dequeue_burst(p->ring, (void **) pkts, + n_pkts, NULL); + RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx); + + return nb_rx; +} + +static int +rte_port_ring_multi_reader_rx(void *port, struct rte_mbuf **pkts, + uint32_t n_pkts) +{ + struct rte_port_ring_reader *p = port; + uint32_t nb_rx; + + nb_rx = rte_ring_mc_dequeue_burst(p->ring, (void **) pkts, + n_pkts, NULL); + RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx); + + return nb_rx; +} + +static int +rte_port_ring_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int +rte_port_ring_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_ring_reader *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port RING Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ring_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_ring *ring; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + uint64_t bsz_mask; + uint32_t is_multi; +}; + +static void * +rte_port_ring_writer_create_internal(void *params, int socket_id, + uint32_t is_multi) +{ + struct rte_port_ring_writer_params *conf = + params; + struct rte_port_ring_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->ring == NULL) || + (conf->ring->prod.single && is_multi) || + (!(conf->ring->prod.single) && !is_multi) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + port->is_multi = is_multi; + + return port; +} + +static void * +rte_port_ring_writer_create(void *params, int socket_id) +{ + return rte_port_ring_writer_create_internal(params, socket_id, 0); +} + +static void * +rte_port_ring_multi_writer_create(void *params, int socket_id) +{ + return rte_port_ring_writer_create_internal(params, socket_id, 1); +} + +static inline void +send_burst(struct rte_port_ring_writer *p) +{ + uint32_t nb_tx; + + nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count, NULL); + + RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static inline void +send_burst_mp(struct rte_port_ring_writer *p) +{ + uint32_t nb_tx; + + nb_tx = rte_ring_mp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count, NULL); + + RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_ring_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer *p = port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_multi_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer *p = port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_mp(p); + + return 0; +} + +static inline int __attribute__((always_inline)) +rte_port_ring_writer_tx_bulk_internal(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint32_t is_multi) +{ + struct rte_port_ring_writer *p = + port; + + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) { + if (is_multi) + send_burst_mp(p); + else + send_burst(p); + } + + RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + if (is_multi) + n_pkts_ok = rte_ring_mp_enqueue_burst(p->ring, + (void **)pkts, n_pkts, NULL); + else + n_pkts_ok = rte_ring_sp_enqueue_burst(p->ring, + (void **)pkts, n_pkts, NULL); + + RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok); + for ( ; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + + rte_pktmbuf_free(pkt); + } + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) { + if (is_multi) + send_burst_mp(p); + else + send_burst(p); + } + } + + return 0; +} + +static int +rte_port_ring_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + return rte_port_ring_writer_tx_bulk_internal(port, pkts, pkts_mask, 0); +} + +static int +rte_port_ring_multi_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + return rte_port_ring_writer_tx_bulk_internal(port, pkts, pkts_mask, 1); +} + +static int +rte_port_ring_writer_flush(void *port) +{ + struct rte_port_ring_writer *p = port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_multi_writer_flush(void *port) +{ + struct rte_port_ring_writer *p = port; + + if (p->tx_buf_count > 0) + send_burst_mp(p); + + return 0; +} + +static int +rte_port_ring_writer_free(void *port) +{ + struct rte_port_ring_writer *p = port; + + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + if (p->is_multi) + rte_port_ring_multi_writer_flush(port); + else + rte_port_ring_writer_flush(port); + + rte_free(port); + + return 0; +} + +static int +rte_port_ring_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ring_writer *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port RING Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ring_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_ring *ring; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + uint64_t bsz_mask; + uint64_t n_retries; + uint32_t is_multi; +}; + +static void * +rte_port_ring_writer_nodrop_create_internal(void *params, int socket_id, + uint32_t is_multi) +{ + struct rte_port_ring_writer_nodrop_params *conf = + params; + struct rte_port_ring_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->ring == NULL) || + (conf->ring->prod.single && is_multi) || + (!(conf->ring->prod.single) && !is_multi) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + port->is_multi = is_multi; + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static void * +rte_port_ring_writer_nodrop_create(void *params, int socket_id) +{ + return rte_port_ring_writer_nodrop_create_internal(params, socket_id, 0); +} + +static void * +rte_port_ring_multi_writer_nodrop_create(void *params, int socket_id) +{ + return rte_port_ring_writer_nodrop_create_internal(params, socket_id, 1); +} + +static inline void +send_burst_nodrop(struct rte_port_ring_writer_nodrop *p) +{ + uint32_t nb_tx = 0, i; + + nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count, NULL); + + /* We sent all the packets in a first try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + + for (i = 0; i < p->n_retries; i++) { + nb_tx += rte_ring_sp_enqueue_burst(p->ring, + (void **) (p->tx_buf + nb_tx), + p->tx_buf_count - nb_tx, NULL); + + /* We sent all the packets in more than one try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + } + + /* We didn't send the packets in maximum allowed attempts */ + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static inline void +send_burst_mp_nodrop(struct rte_port_ring_writer_nodrop *p) +{ + uint32_t nb_tx = 0, i; + + nb_tx = rte_ring_mp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count, NULL); + + /* We sent all the packets in a first try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + + for (i = 0; i < p->n_retries; i++) { + nb_tx += rte_ring_mp_enqueue_burst(p->ring, + (void **) (p->tx_buf + nb_tx), + p->tx_buf_count - nb_tx, NULL); + + /* We sent all the packets in more than one try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + } + + /* We didn't send the packets in maximum allowed attempts */ + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_ring_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer_nodrop *p = + port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_ring_multi_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer_nodrop *p = + port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_mp_nodrop(p); + + return 0; +} + +static inline int __attribute__((always_inline)) +rte_port_ring_writer_nodrop_tx_bulk_internal(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint32_t is_multi) +{ + struct rte_port_ring_writer_nodrop *p = + port; + + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) { + if (is_multi) + send_burst_mp_nodrop(p); + else + send_burst_nodrop(p); + } + + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + if (is_multi) + n_pkts_ok = + rte_ring_mp_enqueue_burst(p->ring, + (void **)pkts, n_pkts, NULL); + else + n_pkts_ok = + rte_ring_sp_enqueue_burst(p->ring, + (void **)pkts, n_pkts, NULL); + + if (n_pkts_ok >= n_pkts) + return 0; + + /* + * If we didn't manage to send all packets in single burst, move + * remaining packets to the buffer and call send burst. + */ + for (; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + + p->tx_buf[p->tx_buf_count++] = pkt; + } + if (is_multi) + send_burst_mp_nodrop(p); + else + send_burst_nodrop(p); + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) { + if (is_multi) + send_burst_mp_nodrop(p); + else + send_burst_nodrop(p); + } + } + + return 0; +} + +static int +rte_port_ring_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + return + rte_port_ring_writer_nodrop_tx_bulk_internal(port, pkts, pkts_mask, 0); +} + +static int +rte_port_ring_multi_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + return + rte_port_ring_writer_nodrop_tx_bulk_internal(port, pkts, pkts_mask, 1); +} + +static int +rte_port_ring_writer_nodrop_flush(void *port) +{ + struct rte_port_ring_writer_nodrop *p = + port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_ring_multi_writer_nodrop_flush(void *port) +{ + struct rte_port_ring_writer_nodrop *p = + port; + + if (p->tx_buf_count > 0) + send_burst_mp_nodrop(p); + + return 0; +} + +static int +rte_port_ring_writer_nodrop_free(void *port) +{ + struct rte_port_ring_writer_nodrop *p = + port; + + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + if (p->is_multi) + rte_port_ring_multi_writer_nodrop_flush(port); + else + rte_port_ring_writer_nodrop_flush(port); + + rte_free(port); + + return 0; +} + +static int +rte_port_ring_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ring_writer_nodrop *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_ring_reader_ops = { + .f_create = rte_port_ring_reader_create, + .f_free = rte_port_ring_reader_free, + .f_rx = rte_port_ring_reader_rx, + .f_stats = rte_port_ring_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_writer_ops = { + .f_create = rte_port_ring_writer_create, + .f_free = rte_port_ring_writer_free, + .f_tx = rte_port_ring_writer_tx, + .f_tx_bulk = rte_port_ring_writer_tx_bulk, + .f_flush = rte_port_ring_writer_flush, + .f_stats = rte_port_ring_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_writer_nodrop_ops = { + .f_create = rte_port_ring_writer_nodrop_create, + .f_free = rte_port_ring_writer_nodrop_free, + .f_tx = rte_port_ring_writer_nodrop_tx, + .f_tx_bulk = rte_port_ring_writer_nodrop_tx_bulk, + .f_flush = rte_port_ring_writer_nodrop_flush, + .f_stats = rte_port_ring_writer_nodrop_stats_read, +}; + +struct rte_port_in_ops rte_port_ring_multi_reader_ops = { + .f_create = rte_port_ring_multi_reader_create, + .f_free = rte_port_ring_reader_free, + .f_rx = rte_port_ring_multi_reader_rx, + .f_stats = rte_port_ring_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_multi_writer_ops = { + .f_create = rte_port_ring_multi_writer_create, + .f_free = rte_port_ring_writer_free, + .f_tx = rte_port_ring_multi_writer_tx, + .f_tx_bulk = rte_port_ring_multi_writer_tx_bulk, + .f_flush = rte_port_ring_multi_writer_flush, + .f_stats = rte_port_ring_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_multi_writer_nodrop_ops = { + .f_create = rte_port_ring_multi_writer_nodrop_create, + .f_free = rte_port_ring_writer_nodrop_free, + .f_tx = rte_port_ring_multi_writer_nodrop_tx, + .f_tx_bulk = rte_port_ring_multi_writer_nodrop_tx_bulk, + .f_flush = rte_port_ring_multi_writer_nodrop_flush, + .f_stats = rte_port_ring_writer_nodrop_stats_read, +}; diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_ring.h b/src/seastar/dpdk/lib/librte_port/rte_port_ring.h new file mode 100644 index 00000000..de377d28 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_ring.h @@ -0,0 +1,123 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_RING_H__ +#define __INCLUDE_RTE_PORT_RING_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Ring + * + * ring_reader: + * input port built on top of pre-initialized single consumer ring + * ring_writer: + * output port built on top of pre-initialized single producer ring + * ring_multi_reader: + * input port built on top of pre-initialized multi consumers ring + * ring_multi_writer: + * output port built on top of pre-initialized multi producers ring + * + ***/ + +#include <stdint.h> + +#include <rte_ring.h> + +#include "rte_port.h" + +/** ring_reader port parameters */ +struct rte_port_ring_reader_params { + /** Underlying consumer ring that has to be pre-initialized */ + struct rte_ring *ring; +}; + +/** ring_reader port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ops; + +/** ring_writer port parameters */ +struct rte_port_ring_writer_params { + /** Underlying producer ring that has to be pre-initialized */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** ring_writer port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ops; + +/** ring_writer_nodrop port parameters */ +struct rte_port_ring_writer_nodrop_params { + /** Underlying producer ring that has to be pre-initialized */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** ring_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_nodrop_ops; + +/** ring_multi_reader port parameters */ +#define rte_port_ring_multi_reader_params rte_port_ring_reader_params + +/** ring_multi_reader port operations */ +extern struct rte_port_in_ops rte_port_ring_multi_reader_ops; + +/** ring_multi_writer port parameters */ +#define rte_port_ring_multi_writer_params rte_port_ring_writer_params + +/** ring_multi_writer port operations */ +extern struct rte_port_out_ops rte_port_ring_multi_writer_ops; + +/** ring_multi_writer_nodrop port parameters */ +#define rte_port_ring_multi_writer_nodrop_params \ + rte_port_ring_writer_nodrop_params + +/** ring_multi_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ring_multi_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_sched.c b/src/seastar/dpdk/lib/librte_port/rte_port_sched.c new file mode 100644 index 00000000..9100a197 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_sched.c @@ -0,0 +1,323 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <string.h> + +#include <rte_mbuf.h> +#include <rte_malloc.h> + +#include "rte_port_sched.h" + +/* + * Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_SCHED_READER_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_SCHED_READER_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_SCHED_READER_PKTS_IN_ADD(port, val) +#define RTE_PORT_SCHED_READER_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_sched_reader { + struct rte_port_in_stats stats; + + struct rte_sched_port *sched; +}; + +static void * +rte_port_sched_reader_create(void *params, int socket_id) +{ + struct rte_port_sched_reader_params *conf = + params; + struct rte_port_sched_reader *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->sched == NULL)) { + RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->sched = conf->sched; + + return port; +} + +static int +rte_port_sched_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_sched_reader *p = port; + uint32_t nb_rx; + + nb_rx = rte_sched_port_dequeue(p->sched, pkts, n_pkts); + RTE_PORT_SCHED_READER_PKTS_IN_ADD(p, nb_rx); + + return nb_rx; +} + +static int +rte_port_sched_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int +rte_port_sched_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_sched_reader *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_sched_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_sched_port *sched; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + uint64_t bsz_mask; +}; + +static void * +rte_port_sched_writer_create(void *params, int socket_id) +{ + struct rte_port_sched_writer_params *conf = + params; + struct rte_port_sched_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->sched == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->sched = conf->sched; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + + return port; +} + +static int +rte_port_sched_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) { + __rte_unused uint32_t nb_tx; + + nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, p->tx_buf_count); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + p->tx_buf_count = 0; + } + + return 0; +} + +static int +rte_port_sched_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port; + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + __rte_unused uint32_t nb_tx; + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + + if (tx_buf_count) { + nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, + tx_buf_count); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, tx_buf_count - nb_tx); + p->tx_buf_count = 0; + } + + nb_tx = rte_sched_port_enqueue(p->sched, pkts, n_pkts); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - nb_tx); + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + p->tx_buf_count = tx_buf_count; + + if (tx_buf_count >= p->tx_burst_sz) { + __rte_unused uint32_t nb_tx; + + nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, + tx_buf_count); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, tx_buf_count - nb_tx); + p->tx_buf_count = 0; + } + } + + return 0; +} + +static int +rte_port_sched_writer_flush(void *port) +{ + struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port; + + if (p->tx_buf_count) { + __rte_unused uint32_t nb_tx; + + nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, p->tx_buf_count); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + p->tx_buf_count = 0; + } + + return 0; +} + +static int +rte_port_sched_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_sched_writer_flush(port); + rte_free(port); + + return 0; +} + +static int +rte_port_sched_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_sched_writer *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_sched_reader_ops = { + .f_create = rte_port_sched_reader_create, + .f_free = rte_port_sched_reader_free, + .f_rx = rte_port_sched_reader_rx, + .f_stats = rte_port_sched_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_sched_writer_ops = { + .f_create = rte_port_sched_writer_create, + .f_free = rte_port_sched_writer_free, + .f_tx = rte_port_sched_writer_tx, + .f_tx_bulk = rte_port_sched_writer_tx_bulk, + .f_flush = rte_port_sched_writer_flush, + .f_stats = rte_port_sched_writer_stats_read, +}; diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_sched.h b/src/seastar/dpdk/lib/librte_port/rte_port_sched.h new file mode 100644 index 00000000..555415ab --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_sched.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_SCHED_H__ +#define __INCLUDE_RTE_PORT_SCHED_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Hierarchical Scheduler + * + * sched_reader: input port built on top of pre-initialized rte_sched_port + * sched_writer: output port built on top of pre-initialized rte_sched_port + * + ***/ + +#include <stdint.h> + +#include <rte_sched.h> + +#include "rte_port.h" + +/** sched_reader port parameters */ +struct rte_port_sched_reader_params { + /** Underlying pre-initialized rte_sched_port */ + struct rte_sched_port *sched; +}; + +/** sched_reader port operations */ +extern struct rte_port_in_ops rte_port_sched_reader_ops; + +/** sched_writer port parameters */ +struct rte_port_sched_writer_params { + /** Underlying pre-initialized rte_sched_port */ + struct rte_sched_port *sched; + + /** Recommended burst size. The actual burst size can be bigger or + smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** sched_writer port operations */ +extern struct rte_port_out_ops rte_port_sched_writer_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_source_sink.c b/src/seastar/dpdk/lib/librte_port/rte_port_source_sink.c new file mode 100644 index 00000000..a79f2f64 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_source_sink.c @@ -0,0 +1,648 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> +#include <string.h> + +#include <rte_mbuf.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_memcpy.h> + +#ifdef RTE_PORT_PCAP +#include <rte_ether.h> +#include <pcap.h> +#endif + +#include "rte_port_source_sink.h" + +/* + * Port SOURCE + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_SOURCE_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_SOURCE_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_source { + struct rte_port_in_stats stats; + + struct rte_mempool *mempool; + + /* PCAP buffers and indices */ + uint8_t **pkts; + uint8_t *pkt_buff; + uint32_t *pkt_len; + uint32_t n_pkts; + uint32_t pkt_index; +}; + +#ifdef RTE_PORT_PCAP + +static int +pcap_source_load(struct rte_port_source *port, + const char *file_name, + uint32_t n_bytes_per_pkt, + int socket_id) +{ + uint32_t n_pkts = 0; + uint32_t i; + uint32_t *pkt_len_aligns = NULL; + size_t total_buff_len = 0; + pcap_t *pcap_handle; + char pcap_errbuf[PCAP_ERRBUF_SIZE]; + uint32_t max_len; + struct pcap_pkthdr pcap_hdr; + const uint8_t *pkt; + uint8_t *buff = NULL; + uint32_t pktmbuf_maxlen = (uint32_t) + (rte_pktmbuf_data_room_size(port->mempool) - + RTE_PKTMBUF_HEADROOM); + + if (n_bytes_per_pkt == 0) + max_len = pktmbuf_maxlen; + else + max_len = RTE_MIN(n_bytes_per_pkt, pktmbuf_maxlen); + + /* first time open, get packet number */ + pcap_handle = pcap_open_offline(file_name, pcap_errbuf); + if (pcap_handle == NULL) { + RTE_LOG(ERR, PORT, "Failed to open pcap file " + "'%s' for reading\n", file_name); + goto error_exit; + } + + while ((pkt = pcap_next(pcap_handle, &pcap_hdr)) != NULL) + n_pkts++; + + pcap_close(pcap_handle); + + port->pkt_len = rte_zmalloc_socket("PCAP", + (sizeof(*port->pkt_len) * n_pkts), 0, socket_id); + if (port->pkt_len == NULL) { + RTE_LOG(ERR, PORT, "No enough memory\n"); + goto error_exit; + } + + pkt_len_aligns = rte_malloc("PCAP", + (sizeof(*pkt_len_aligns) * n_pkts), 0); + if (pkt_len_aligns == NULL) { + RTE_LOG(ERR, PORT, "No enough memory\n"); + goto error_exit; + } + + port->pkts = rte_zmalloc_socket("PCAP", + (sizeof(*port->pkts) * n_pkts), 0, socket_id); + if (port->pkts == NULL) { + RTE_LOG(ERR, PORT, "No enough memory\n"); + goto error_exit; + } + + /* open 2nd time, get pkt_len */ + pcap_handle = pcap_open_offline(file_name, pcap_errbuf); + if (pcap_handle == NULL) { + RTE_LOG(ERR, PORT, "Failed to open pcap file " + "'%s' for reading\n", file_name); + goto error_exit; + } + + for (i = 0; i < n_pkts; i++) { + pkt = pcap_next(pcap_handle, &pcap_hdr); + port->pkt_len[i] = RTE_MIN(max_len, pcap_hdr.len); + pkt_len_aligns[i] = RTE_CACHE_LINE_ROUNDUP( + port->pkt_len[i]); + total_buff_len += pkt_len_aligns[i]; + } + + pcap_close(pcap_handle); + + /* allocate a big trunk of data for pcap file load */ + buff = rte_zmalloc_socket("PCAP", + total_buff_len, 0, socket_id); + if (buff == NULL) { + RTE_LOG(ERR, PORT, "No enough memory\n"); + goto error_exit; + } + + port->pkt_buff = buff; + + /* open file one last time to copy the pkt content */ + pcap_handle = pcap_open_offline(file_name, pcap_errbuf); + if (pcap_handle == NULL) { + RTE_LOG(ERR, PORT, "Failed to open pcap file " + "'%s' for reading\n", file_name); + goto error_exit; + } + + for (i = 0; i < n_pkts; i++) { + pkt = pcap_next(pcap_handle, &pcap_hdr); + rte_memcpy(buff, pkt, port->pkt_len[i]); + port->pkts[i] = buff; + buff += pkt_len_aligns[i]; + } + + pcap_close(pcap_handle); + + port->n_pkts = n_pkts; + + rte_free(pkt_len_aligns); + + RTE_LOG(INFO, PORT, "Successfully load pcap file " + "'%s' with %u pkts\n", + file_name, port->n_pkts); + + return 0; + +error_exit: + if (pkt_len_aligns) + rte_free(pkt_len_aligns); + if (port->pkt_len) + rte_free(port->pkt_len); + if (port->pkts) + rte_free(port->pkts); + if (port->pkt_buff) + rte_free(port->pkt_buff); + + return -1; +} + +#define PCAP_SOURCE_LOAD(port, file_name, n_bytes, socket_id) \ + pcap_source_load(port, file_name, n_bytes, socket_id) + +#else /* RTE_PORT_PCAP */ + +#define PCAP_SOURCE_LOAD(port, file_name, n_bytes, socket_id) \ +({ \ + int _ret = 0; \ + \ + if (file_name) { \ + RTE_LOG(ERR, PORT, "Source port field " \ + "\"file_name\" is not NULL.\n"); \ + _ret = -1; \ + } \ + \ + _ret; \ +}) + +#endif /* RTE_PORT_PCAP */ + +static void * +rte_port_source_create(void *params, int socket_id) +{ + struct rte_port_source_params *p = + params; + struct rte_port_source *port; + + /* Check input arguments*/ + if ((p == NULL) || (p->mempool == NULL)) { + RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->mempool = (struct rte_mempool *) p->mempool; + + if (p->file_name) { + int status = PCAP_SOURCE_LOAD(port, p->file_name, + p->n_bytes_per_pkt, socket_id); + + if (status < 0) { + rte_free(port); + port = NULL; + } + } + + return port; +} + +static int +rte_port_source_free(void *port) +{ + struct rte_port_source *p = + port; + + /* Check input parameters */ + if (p == NULL) + return 0; + + if (p->pkt_len) + rte_free(p->pkt_len); + if (p->pkts) + rte_free(p->pkts); + if (p->pkt_buff) + rte_free(p->pkt_buff); + + rte_free(p); + + return 0; +} + +static int +rte_port_source_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_source *p = port; + uint32_t i; + + if (rte_pktmbuf_alloc_bulk(p->mempool, pkts, n_pkts) != 0) + return 0; + + if (p->pkt_buff != NULL) { + for (i = 0; i < n_pkts; i++) { + uint8_t *pkt_data = rte_pktmbuf_mtod(pkts[i], + uint8_t *); + + rte_memcpy(pkt_data, p->pkts[p->pkt_index], + p->pkt_len[p->pkt_index]); + pkts[i]->data_len = p->pkt_len[p->pkt_index]; + pkts[i]->pkt_len = pkts[i]->data_len; + + p->pkt_index++; + if (p->pkt_index >= p->n_pkts) + p->pkt_index = 0; + } + } + + RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(p, n_pkts); + + return n_pkts; +} + +static int +rte_port_source_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_source *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port SINK + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_SINK_STATS_PKTS_IN_ADD(port, val) \ + (port->stats.n_pkts_in += val) +#define RTE_PORT_SINK_STATS_PKTS_DROP_ADD(port, val) \ + (port->stats.n_pkts_drop += val) + +#else + +#define RTE_PORT_SINK_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_SINK_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_sink { + struct rte_port_out_stats stats; + + /* PCAP dumper handle and pkts number */ + void *dumper; + uint32_t max_pkts; + uint32_t pkt_index; + uint32_t dump_finish; +}; + +#ifdef RTE_PORT_PCAP + +static int +pcap_sink_open(struct rte_port_sink *port, + const char *file_name, + uint32_t max_n_pkts) +{ + pcap_t *tx_pcap; + pcap_dumper_t *pcap_dumper; + + /** Open a dead pcap handler for opening dumper file */ + tx_pcap = pcap_open_dead(DLT_EN10MB, 65535); + if (tx_pcap == NULL) { + RTE_LOG(ERR, PORT, "Cannot open pcap dead handler\n"); + return -1; + } + + /* The dumper is created using the previous pcap_t reference */ + pcap_dumper = pcap_dump_open(tx_pcap, file_name); + if (pcap_dumper == NULL) { + RTE_LOG(ERR, PORT, "Failed to open pcap file " + "\"%s\" for writing\n", file_name); + return -1; + } + + port->dumper = pcap_dumper; + port->max_pkts = max_n_pkts; + port->pkt_index = 0; + port->dump_finish = 0; + + RTE_LOG(INFO, PORT, "Ready to dump packets to file \"%s\"\n", + file_name); + + return 0; +} + +static void +pcap_sink_write_pkt(struct rte_port_sink *port, struct rte_mbuf *mbuf) +{ + uint8_t *pcap_dumper = (port->dumper); + struct pcap_pkthdr pcap_hdr; + uint8_t jumbo_pkt_buf[ETHER_MAX_JUMBO_FRAME_LEN]; + uint8_t *pkt; + + /* Maximum num packets already reached */ + if (port->dump_finish) + return; + + pkt = rte_pktmbuf_mtod(mbuf, uint8_t *); + + pcap_hdr.len = mbuf->pkt_len; + pcap_hdr.caplen = pcap_hdr.len; + gettimeofday(&(pcap_hdr.ts), NULL); + + if (mbuf->nb_segs > 1) { + struct rte_mbuf *jumbo_mbuf; + uint32_t pkt_index = 0; + + /* if packet size longer than ETHER_MAX_JUMBO_FRAME_LEN, + * ignore it. + */ + if (mbuf->pkt_len > ETHER_MAX_JUMBO_FRAME_LEN) + return; + + for (jumbo_mbuf = mbuf; jumbo_mbuf != NULL; + jumbo_mbuf = jumbo_mbuf->next) { + rte_memcpy(&jumbo_pkt_buf[pkt_index], + rte_pktmbuf_mtod(jumbo_mbuf, uint8_t *), + jumbo_mbuf->data_len); + pkt_index += jumbo_mbuf->data_len; + } + + jumbo_pkt_buf[pkt_index] = '\0'; + + pkt = jumbo_pkt_buf; + } + + pcap_dump(pcap_dumper, &pcap_hdr, pkt); + + port->pkt_index++; + + if ((port->max_pkts != 0) && (port->pkt_index >= port->max_pkts)) { + port->dump_finish = 1; + RTE_LOG(INFO, PORT, "Dumped %u packets to file\n", + port->pkt_index); + } + +} + +#define PCAP_SINK_OPEN(port, file_name, max_n_pkts) \ + pcap_sink_open(port, file_name, max_n_pkts) + +#define PCAP_SINK_WRITE_PKT(port, mbuf) \ + pcap_sink_write_pkt(port, mbuf) + +#define PCAP_SINK_FLUSH_PKT(dumper) \ +do { \ + if (dumper) \ + pcap_dump_flush((pcap_dumper_t *)dumper); \ +} while (0) + +#define PCAP_SINK_CLOSE(dumper) \ +do { \ + if (dumper) \ + pcap_dump_close((pcap_dumper_t *)dumper); \ +} while (0) + +#else + +#define PCAP_SINK_OPEN(port, file_name, max_n_pkts) \ +({ \ + int _ret = 0; \ + \ + if (file_name) { \ + RTE_LOG(ERR, PORT, "Sink port field " \ + "\"file_name\" is not NULL.\n"); \ + _ret = -1; \ + } \ + \ + _ret; \ +}) + +#define PCAP_SINK_WRITE_PKT(port, mbuf) {} + +#define PCAP_SINK_FLUSH_PKT(dumper) + +#define PCAP_SINK_CLOSE(dumper) + +#endif + +static void * +rte_port_sink_create(void *params, int socket_id) +{ + struct rte_port_sink *port; + struct rte_port_sink_params *p = params; + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + if (!p) + return port; + + if (p->file_name) { + int status = PCAP_SINK_OPEN(port, p->file_name, + p->max_n_pkts); + + if (status < 0) { + rte_free(port); + port = NULL; + } + } + + return port; +} + +static int +rte_port_sink_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_sink *p = port; + + RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, 1); + if (p->dumper != NULL) + PCAP_SINK_WRITE_PKT(p, pkt); + rte_pktmbuf_free(pkt); + RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, 1); + + return 0; +} + +static int +rte_port_sink_tx_bulk(void *port, struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_sink *p = port; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, n_pkts); + RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, n_pkts); + + if (p->dumper) { + for (i = 0; i < n_pkts; i++) + PCAP_SINK_WRITE_PKT(p, pkts[i]); + } + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + + rte_pktmbuf_free(pkt); + } + + } else { + if (p->dumper) { + uint64_t dump_pkts_mask = pkts_mask; + uint32_t pkt_index; + + for ( ; dump_pkts_mask; ) { + pkt_index = __builtin_ctzll( + dump_pkts_mask); + PCAP_SINK_WRITE_PKT(p, pkts[pkt_index]); + dump_pkts_mask &= ~(1LLU << pkt_index); + } + } + + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, 1); + RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, 1); + rte_pktmbuf_free(pkt); + pkts_mask &= ~pkt_mask; + } + } + + return 0; +} + +static int +rte_port_sink_flush(void *port) +{ + struct rte_port_sink *p = + port; + + if (p == NULL) + return 0; + + PCAP_SINK_FLUSH_PKT(p->dumper); + + return 0; +} + +static int +rte_port_sink_free(void *port) +{ + struct rte_port_sink *p = + port; + + if (p == NULL) + return 0; + + PCAP_SINK_CLOSE(p->dumper); + + rte_free(p); + + return 0; +} + +static int +rte_port_sink_stats_read(void *port, struct rte_port_out_stats *stats, + int clear) +{ + struct rte_port_sink *p = + port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_source_ops = { + .f_create = rte_port_source_create, + .f_free = rte_port_source_free, + .f_rx = rte_port_source_rx, + .f_stats = rte_port_source_stats_read, +}; + +struct rte_port_out_ops rte_port_sink_ops = { + .f_create = rte_port_sink_create, + .f_free = rte_port_sink_free, + .f_tx = rte_port_sink_tx, + .f_tx_bulk = rte_port_sink_tx_bulk, + .f_flush = rte_port_sink_flush, + .f_stats = rte_port_sink_stats_read, +}; diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_source_sink.h b/src/seastar/dpdk/lib/librte_port/rte_port_source_sink.h new file mode 100644 index 00000000..be585a77 --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_source_sink.h @@ -0,0 +1,87 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_SOURCE_SINK_H__ +#define __INCLUDE_RTE_PORT_SOURCE_SINK_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Source/Sink + * + * source: input port that can be used to generate packets + * sink: output port that drops all packets written to it + * + ***/ + +#include "rte_port.h" + +/** source port parameters */ +struct rte_port_source_params { + /** Pre-initialized buffer pool */ + struct rte_mempool *mempool; + + /** The full path of the pcap file to read packets from */ + const char *file_name; + /** The number of bytes to be read from each packet in the + * pcap file. If this value is 0, the whole packet is read; + * if it is bigger than packet size, the generated packets + * will contain the whole packet */ + uint32_t n_bytes_per_pkt; +}; + +/** source port operations */ +extern struct rte_port_in_ops rte_port_source_ops; + +/** sink port parameters */ +struct rte_port_sink_params { + /** The full path of the pcap file to write the packets to */ + const char *file_name; + /** The maximum number of packets write to the pcap file. + * If this value is 0, the "infinite" write will be carried + * out. + */ + uint32_t max_n_pkts; +}; + +/** sink port operations */ +extern struct rte_port_out_ops rte_port_sink_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/seastar/dpdk/lib/librte_port/rte_port_version.map b/src/seastar/dpdk/lib/librte_port/rte_port_version.map new file mode 100644 index 00000000..6470629b --- /dev/null +++ b/src/seastar/dpdk/lib/librte_port/rte_port_version.map @@ -0,0 +1,53 @@ +DPDK_2.0 { + global: + + rte_port_ethdev_reader_ops; + rte_port_ethdev_writer_ops; + rte_port_ring_reader_ipv4_frag_ops; + rte_port_ring_reader_ops; + rte_port_ring_writer_ipv4_ras_ops; + rte_port_ring_writer_ops; + rte_port_sched_reader_ops; + rte_port_sched_writer_ops; + rte_port_sink_ops; + rte_port_source_ops; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_port_ethdev_writer_nodrop_ops; + rte_port_ring_reader_ipv6_frag_ops; + rte_port_ring_writer_ipv6_ras_ops; + rte_port_ring_writer_nodrop_ops; + +} DPDK_2.0; + +DPDK_2.2 { + global: + + rte_port_ring_multi_reader_ops; + rte_port_ring_multi_writer_ops; + rte_port_ring_multi_writer_nodrop_ops; + +} DPDK_2.1; + +DPDK_16.07 { + global: + + rte_port_kni_reader_ops; + rte_port_kni_writer_ops; + rte_port_kni_writer_nodrop_ops; + +} DPDK_2.2; + +DPDK_16.11 { + global: + + rte_port_fd_reader_ops; + rte_port_fd_writer_ops; + rte_port_fd_writer_nodrop_ops; + +} DPDK_16.07; |