diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/dpdk/drivers/net/bonding | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/dpdk/drivers/net/bonding')
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/Makefile | 36 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/eth_bond_8023ad_private.h | 308 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/eth_bond_private.h | 324 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/meson.build | 11 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond.h | 351 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c | 1719 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.h | 334 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_alb.c | 271 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_alb.h | 113 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_api.c | 1052 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_args.c | 301 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_flow.c | 245 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c | 3760 | ||||
-rw-r--r-- | src/spdk/dpdk/drivers/net/bonding/rte_pmd_bond_version.map | 33 |
14 files changed, 8858 insertions, 0 deletions
diff --git a/src/spdk/dpdk/drivers/net/bonding/Makefile b/src/spdk/dpdk/drivers/net/bonding/Makefile new file mode 100644 index 000000000..728551a84 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/Makefile @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2014 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_bond.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs +LDLIBS += -lrte_pci -lrte_bus_pci +LDLIBS += -lrte_bus_vdev + +EXPORT_MAP := rte_pmd_bond_version.map + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_api.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_alb.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_flow.c + +# +# Export include files +# +SYMLINK-y-include += rte_eth_bond.h +SYMLINK-y-include += rte_eth_bond_8023ad.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/net/bonding/eth_bond_8023ad_private.h b/src/spdk/dpdk/drivers/net/bonding/eth_bond_8023ad_private.h new file mode 100644 index 000000000..6e44ffdb1 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/eth_bond_8023ad_private.h @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef _ETH_BOND_8023AD_PRIVATE_H_ +#define _ETH_BOND_8023AD_PRIVATE_H_ + +#include <stdint.h> + +#include <rte_ether.h> +#include <rte_byteorder.h> +#include <rte_atomic.h> +#include <rte_flow.h> + +#include "rte_eth_bond_8023ad.h" + +#define BOND_MODE_8023AX_UPDATE_TIMEOUT_MS 100 +/** Maximum number of packets to one slave queued in TX ring. */ +#define BOND_MODE_8023AX_SLAVE_RX_PKTS 3 +/** Maximum number of LACP packets from one slave queued in TX ring. */ +#define BOND_MODE_8023AX_SLAVE_TX_PKTS 1 +/** + * Timeouts deffinitions (5.4.4 in 802.1AX documentation). + */ +#define BOND_8023AD_FAST_PERIODIC_MS 900 +#define BOND_8023AD_SLOW_PERIODIC_MS 29000 +#define BOND_8023AD_SHORT_TIMEOUT_MS 3000 +#define BOND_8023AD_LONG_TIMEOUT_MS 90000 +#define BOND_8023AD_CHURN_DETECTION_TIMEOUT_MS 60000 +#define BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS 2000 +#define BOND_8023AD_TX_MACHINE_PERIOD_MS 500 +#define BOND_8023AD_RX_MARKER_PERIOD_MS 2000 + +/** + * Interval of showing warning message from state machines. All messages will + * be held (and gathered together) to prevent flooding. + * This is no parto of 802.1AX standard. + */ +#define BOND_8023AD_WARNINGS_PERIOD_MS 1000 + + + +/** + * State machine flags + */ +#define SM_FLAGS_BEGIN 0x0001 +#define SM_FLAGS_LACP_ENABLED 0x0002 +#define SM_FLAGS_ACTOR_CHURN 0x0004 +#define SM_FLAGS_PARTNER_CHURN 0x0008 +#define SM_FLAGS_MOVED 0x0100 +#define SM_FLAGS_PARTNER_SHORT_TIMEOUT 0x0200 +#define SM_FLAGS_NTT 0x0400 + +#define BOND_LINK_FULL_DUPLEX_KEY 0x01 +#define BOND_LINK_SPEED_KEY_10M 0x02 +#define BOND_LINK_SPEED_KEY_100M 0x04 +#define BOND_LINK_SPEED_KEY_1000M 0x08 +#define BOND_LINK_SPEED_KEY_10G 0x10 +#define BOND_LINK_SPEED_KEY_20G 0x11 +#define BOND_LINK_SPEED_KEY_40G 0x12 + +#define WRN_RX_MARKER_TO_FAST 0x01 +#define WRN_UNKNOWN_SLOW_TYPE 0x02 +#define WRN_UNKNOWN_MARKER_TYPE 0x04 +#define WRN_NOT_LACP_CAPABLE 0x08 +#define WRN_RX_QUEUE_FULL 0x10 +#define WRN_TX_QUEUE_FULL 0x20 + +#define CHECK_FLAGS(_variable, _f) ((_variable) & (_f)) +#define SET_FLAGS(_variable, _f) ((_variable) |= (_f)) +#define CLEAR_FLAGS(_variable, _f) ((_variable) &= ~(_f)) + +#define SM_FLAG(_p, _f) (!!CHECK_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f)) +#define SM_FLAG_SET(_p, _f) SET_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f) +#define SM_FLAG_CLR(_p, _f) CLEAR_FLAGS((_p)->sm_flags, SM_FLAGS_ ## _f) + +#define ACTOR_STATE(_p, _f) (!!CHECK_FLAGS((_p)->actor_state, STATE_ ## _f)) +#define ACTOR_STATE_SET(_p, _f) SET_FLAGS((_p)->actor_state, STATE_ ## _f) +#define ACTOR_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->actor_state, STATE_ ## _f) + +#define PARTNER_STATE(_p, _f) (!!CHECK_FLAGS((_p)->partner_state, STATE_ ## _f)) +#define PARTNER_STATE_SET(_p, _f) SET_FLAGS((_p)->partner_state, STATE_ ## _f) +#define PARTNER_STATE_CLR(_p, _f) CLEAR_FLAGS((_p)->partner_state, STATE_ ## _f) + +/** Variables associated with each port (5.4.7 in 802.1AX documentation). */ +struct port { + /** + * The operational values of the Actor's state parameters. Bitmask + * of port states. + */ + uint8_t actor_state; + + /** The operational Actor's port parameters */ + struct port_params actor; + + /** + * The operational value of the Actor's view of the current values of + * the Partner's state parameters. The Actor sets this variable either + * to the value received from the Partner in an LACPDU, or to the value + * of Partner_Admin_Port_State. Bitmask of port states. + */ + uint8_t partner_state; + + /** The operational Partner's port parameters */ + struct port_params partner; + + /* Additional port parameters not listed in documentation */ + /** State machine flags */ + uint16_t sm_flags; + enum rte_bond_8023ad_selection selected; + + /** Indicates if either allmulti or promisc has been enforced on the + * slave so that we can receive lacp packets + */ +#define BOND_8023AD_FORCED_ALLMULTI (1 << 0) +#define BOND_8023AD_FORCED_PROMISC (1 << 1) + uint8_t forced_rx_flags; + + uint64_t current_while_timer; + uint64_t periodic_timer; + uint64_t wait_while_timer; + uint64_t tx_machine_timer; + uint64_t tx_marker_timer; + /* Agregator parameters */ + /** Used aggregator port ID */ + uint16_t aggregator_port_id; + + /** Memory pool used to allocate rings */ + struct rte_mempool *mbuf_pool; + + /** Ring of LACP packets from RX burst function */ + struct rte_ring *rx_ring; + + /** Ring of slow protocol packets (LACP and MARKERS) to TX burst function */ + struct rte_ring *tx_ring; + + /** Timer which is also used as mutex. If is 0 (not running) RX marker + * packet might be responded. Otherwise shall be dropped. It is zeroed in + * mode 4 callback function after expire. */ + volatile uint64_t rx_marker_timer; + + uint64_t warning_timer; + volatile uint16_t warnings_to_show; + + /** Memory pool used to allocate slow queues */ + struct rte_mempool *slow_pool; +}; + +struct mode8023ad_private { + uint64_t fast_periodic_timeout; + uint64_t slow_periodic_timeout; + uint64_t short_timeout; + uint64_t long_timeout; + uint64_t aggregate_wait_timeout; + uint64_t tx_period_timeout; + uint64_t rx_marker_timeout; + uint64_t update_timeout_us; + rte_eth_bond_8023ad_ext_slowrx_fn slowrx_cb; + uint8_t external_sm; + struct rte_ether_addr mac_addr; + + struct rte_eth_link slave_link; + /***< slave link properties */ + + /** + * Configuration of dedicated hardware queues for control plane + * traffic + */ + struct { + uint8_t enabled; + + struct rte_flow *flow[RTE_MAX_ETHPORTS]; + + uint16_t rx_qid; + uint16_t tx_qid; + } dedicated_queues; + enum rte_bond_8023ad_agg_selection agg_selection; +}; + +/** + * @internal + * The pool of *port* structures. The size of the pool + * is configured at compile-time in the <rte_eth_bond_8023ad.c> file. + */ +extern struct port bond_mode_8023ad_ports[]; + +/* Forward declaration */ +struct bond_dev_private; + + +/** + * @internal + * + * Set mode 4 configuration of bonded interface. + * + * @pre Bonded interface must be stopped. + * + * @param dev Bonded interface + * @param conf new configuration. If NULL set default configuration. + */ +void +bond_mode_8023ad_setup(struct rte_eth_dev *dev, + struct rte_eth_bond_8023ad_conf *conf); + +/** + * @internal + * + * Enables 802.1AX mode and all active slaves on bonded interface. + * + * @param dev Bonded interface + * @return + * 0 on success, negative value otherwise. + */ +int +bond_mode_8023ad_enable(struct rte_eth_dev *dev); + +/** + * @internal + * + * Disables 802.1AX mode of the bonded interface and slaves. + * + * @param dev Bonded interface + * @return + * 0 on success, negative value otherwise. + */ +int bond_mode_8023ad_disable(struct rte_eth_dev *dev); + +/** + * @internal + * + * Starts 802.3AX state machines management logic. + * @param dev Bonded interface + * @return + * 0 if machines was started, 1 if machines was already running, + * negative value otherwise. + */ +int +bond_mode_8023ad_start(struct rte_eth_dev *dev); + +/** + * @internal + * + * Stops 802.3AX state machines management logic. + * @param dev Bonded interface + * @return + * 0 if this call stopped state machines, -ENOENT if alarm was not set. + */ +void +bond_mode_8023ad_stop(struct rte_eth_dev *dev); + +/** + * @internal + * + * Passes given slow packet to state machines management logic. + * @param internals Bonded device private data. + * @param slave_id Slave port id. + * @param slot_pkt Slow packet. + */ +void +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, + uint16_t slave_id, struct rte_mbuf *pkt); + +/** + * @internal + * + * Appends given slave used slave + * + * @param dev Bonded interface. + * @param port_id Slave port ID to be added + * + * @return + * 0 on success, negative value otherwise. + */ +void +bond_mode_8023ad_activate_slave(struct rte_eth_dev *dev, uint16_t port_id); + +/** + * @internal + * + * Denitializes and removes given slave from 802.1AX mode. + * + * @param dev Bonded interface. + * @param slave_num Position of slave in active_slaves array + * + * @return + * 0 on success, negative value otherwise. + */ +int +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *dev, uint16_t slave_pos); + +/** + * Updates state when MAC was changed on bonded device or one of its slaves. + * @param bond_dev Bonded device + */ +void +bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev); + +int +bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev, + uint16_t slave_port); + +int +bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port); + +int +bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id); + +#endif /* _ETH_BOND_8023AD_H_ */ diff --git a/src/spdk/dpdk/drivers/net/bonding/eth_bond_private.h b/src/spdk/dpdk/drivers/net/bonding/eth_bond_private.h new file mode 100644 index 000000000..c9b2d0fe4 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/eth_bond_private.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#ifndef _ETH_BOND_PRIVATE_H_ +#define _ETH_BOND_PRIVATE_H_ + +#include <stdint.h> +#include <sys/queue.h> + +#include <rte_ethdev_driver.h> +#include <rte_flow.h> +#include <rte_spinlock.h> +#include <rte_bitmap.h> +#include <rte_flow_driver.h> + +#include "rte_eth_bond.h" +#include "eth_bond_8023ad_private.h" +#include "rte_eth_bond_alb.h" + +#define PMD_BOND_SLAVE_PORT_KVARG ("slave") +#define PMD_BOND_PRIMARY_SLAVE_KVARG ("primary") +#define PMD_BOND_MODE_KVARG ("mode") +#define PMD_BOND_AGG_MODE_KVARG ("agg_mode") +#define PMD_BOND_XMIT_POLICY_KVARG ("xmit_policy") +#define PMD_BOND_SOCKET_ID_KVARG ("socket_id") +#define PMD_BOND_MAC_ADDR_KVARG ("mac") +#define PMD_BOND_LSC_POLL_PERIOD_KVARG ("lsc_poll_period_ms") +#define PMD_BOND_LINK_UP_PROP_DELAY_KVARG ("up_delay") +#define PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG ("down_delay") + +#define PMD_BOND_XMIT_POLICY_LAYER2_KVARG ("l2") +#define PMD_BOND_XMIT_POLICY_LAYER23_KVARG ("l23") +#define PMD_BOND_XMIT_POLICY_LAYER34_KVARG ("l34") + +extern int bond_logtype; + +#define RTE_BOND_LOG(lvl, msg, ...) \ + rte_log(RTE_LOG_ ## lvl, bond_logtype, \ + "%s(%d) - " msg "\n", __func__, __LINE__, ##__VA_ARGS__) + +#define BONDING_MODE_INVALID 0xFF + +extern const char *pmd_bond_init_valid_arguments[]; + +extern struct rte_vdev_driver pmd_bond_drv; + +extern const struct rte_flow_ops bond_flow_ops; + +/** Port Queue Mapping Structure */ +struct bond_rx_queue { + uint16_t queue_id; + /**< Queue Id */ + struct bond_dev_private *dev_private; + /**< Reference to eth_dev private structure */ + uint16_t nb_rx_desc; + /**< Number of RX descriptors available for the queue */ + struct rte_eth_rxconf rx_conf; + /**< Copy of RX configuration structure for queue */ + struct rte_mempool *mb_pool; + /**< Reference to mbuf pool to use for RX queue */ +}; + +struct bond_tx_queue { + uint16_t queue_id; + /**< Queue Id */ + struct bond_dev_private *dev_private; + /**< Reference to dev private structure */ + uint16_t nb_tx_desc; + /**< Number of TX descriptors available for the queue */ + struct rte_eth_txconf tx_conf; + /**< Copy of TX configuration structure for queue */ +}; + +/** Bonded slave devices structure */ +struct bond_ethdev_slave_ports { + uint16_t slaves[RTE_MAX_ETHPORTS]; /**< Slave port id array */ + uint16_t slave_count; /**< Number of slaves */ +}; + +struct bond_slave_details { + uint16_t port_id; + + uint8_t link_status_poll_enabled; + uint8_t link_status_wait_to_complete; + uint8_t last_link_status; + /**< Port Id of slave eth_dev */ + struct rte_ether_addr persisted_mac_addr; + + uint16_t reta_size; +}; + +struct rte_flow { + TAILQ_ENTRY(rte_flow) next; + /* Slaves flows */ + struct rte_flow *flows[RTE_MAX_ETHPORTS]; + /* Flow description for synchronization */ + struct rte_flow_conv_rule rule; + uint8_t rule_data[]; +}; + +typedef void (*burst_xmit_hash_t)(struct rte_mbuf **buf, uint16_t nb_pkts, + uint16_t slave_count, uint16_t *slaves); + +/** Link Bonding PMD device private configuration Structure */ +struct bond_dev_private { + uint16_t port_id; /**< Port Id of Bonded Port */ + uint8_t mode; /**< Link Bonding Mode */ + + rte_spinlock_t lock; + rte_spinlock_t lsc_lock; + + uint16_t primary_port; /**< Primary Slave Port */ + uint16_t current_primary_port; /**< Primary Slave Port */ + uint16_t user_defined_primary_port; + /**< Flag for whether primary port is user defined or not */ + + uint8_t balance_xmit_policy; + /**< Transmit policy - l2 / l23 / l34 for operation in balance mode */ + burst_xmit_hash_t burst_xmit_hash; + /**< Transmit policy hash function */ + + uint8_t user_defined_mac; + /**< Flag for whether MAC address is user defined or not */ + + uint8_t link_status_polling_enabled; + uint32_t link_status_polling_interval_ms; + + uint32_t link_down_delay_ms; + uint32_t link_up_delay_ms; + + uint16_t nb_rx_queues; /**< Total number of rx queues */ + uint16_t nb_tx_queues; /**< Total number of tx queues*/ + + uint16_t active_slave; /**< Next active_slave to poll */ + uint16_t active_slave_count; /**< Number of active slaves */ + uint16_t active_slaves[RTE_MAX_ETHPORTS]; /**< Active slave list */ + + uint16_t slave_count; /**< Number of bonded slaves */ + struct bond_slave_details slaves[RTE_MAX_ETHPORTS]; + /**< Arary of bonded slaves details */ + + struct mode8023ad_private mode4; + uint16_t tlb_slaves_order[RTE_MAX_ETHPORTS]; + /**< TLB active slaves send order */ + struct mode_alb_private mode6; + + uint64_t rx_offload_capa; /** Rx offload capability */ + uint64_t tx_offload_capa; /** Tx offload capability */ + uint64_t rx_queue_offload_capa; /** per queue Rx offload capability */ + uint64_t tx_queue_offload_capa; /** per queue Tx offload capability */ + + /**< List of the configured flows */ + TAILQ_HEAD(sub_flows, rte_flow) flow_list; + + /**< Flow isolation state */ + int flow_isolated; + int flow_isolated_valid; + + /** Bit mask of RSS offloads, the bit offset also means flow type */ + uint64_t flow_type_rss_offloads; + + struct rte_eth_rxconf default_rxconf; /**< Default RxQ conf. */ + struct rte_eth_txconf default_txconf; /**< Default TxQ conf. */ + struct rte_eth_desc_lim rx_desc_lim; /**< Rx descriptor limits */ + struct rte_eth_desc_lim tx_desc_lim; /**< Tx descriptor limits */ + + uint16_t reta_size; + struct rte_eth_rss_reta_entry64 reta_conf[ETH_RSS_RETA_SIZE_512 / + RTE_RETA_GROUP_SIZE]; + + uint8_t rss_key[52]; /**< 52-byte hash key buffer. */ + uint8_t rss_key_len; /**< hash key length in bytes. */ + + struct rte_kvargs *kvlist; + uint8_t slave_update_idx; + + uint32_t candidate_max_rx_pktlen; + uint32_t max_rx_pktlen; + + void *vlan_filter_bmpmem; /* enabled vlan filter bitmap */ + struct rte_bitmap *vlan_filter_bmp; +}; + +extern const struct eth_dev_ops default_dev_ops; + +int +check_for_master_bonded_ethdev(const struct rte_eth_dev *eth_dev); + +int +check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev); + +/* Search given slave array to find position of given id. + * Return slave pos or slaves_count if not found. */ +static inline uint16_t +find_slave_by_id(uint16_t *slaves, uint16_t slaves_count, uint16_t slave_id) { + + uint16_t pos; + for (pos = 0; pos < slaves_count; pos++) { + if (slave_id == slaves[pos]) + break; + } + + return pos; +} + +int +valid_port_id(uint16_t port_id); + +int +valid_bonded_port_id(uint16_t port_id); + +int +valid_slave_port_id(uint16_t port_id, uint8_t mode); + +void +deactivate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id); + +void +activate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id); + +int +mac_address_set(struct rte_eth_dev *eth_dev, + struct rte_ether_addr *new_mac_addr); + +int +mac_address_get(struct rte_eth_dev *eth_dev, + struct rte_ether_addr *dst_mac_addr); + +int +mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev); + +int +slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev, + uint16_t slave_port_id); + +int +slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev, + uint16_t slave_port_id); + +int +bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode); + +int +slave_configure(struct rte_eth_dev *bonded_eth_dev, + struct rte_eth_dev *slave_eth_dev); + +void +slave_remove(struct bond_dev_private *internals, + struct rte_eth_dev *slave_eth_dev); + +void +slave_add(struct bond_dev_private *internals, + struct rte_eth_dev *slave_eth_dev); + +void +burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint16_t slave_count, uint16_t *slaves); + +void +burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint16_t slave_count, uint16_t *slaves); + +void +burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint16_t slave_count, uint16_t *slaves); + + +void +bond_ethdev_primary_set(struct bond_dev_private *internals, + uint16_t slave_port_id); + +int +bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, + void *param, void *ret_param); + +int +bond_ethdev_parse_slave_port_kvarg(const char *key, + const char *value, void *extra_args); + +int +bond_ethdev_parse_slave_mode_kvarg(const char *key, + const char *value, void *extra_args); + +int +bond_ethdev_parse_slave_agg_mode_kvarg(const char *key __rte_unused, + const char *value, void *extra_args); + +int +bond_ethdev_parse_socket_id_kvarg(const char *key, + const char *value, void *extra_args); + +int +bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key, + const char *value, void *extra_args); + +int +bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key, + const char *value, void *extra_args); + +int +bond_ethdev_parse_bond_mac_addr_kvarg(const char *key, + const char *value, void *extra_args); + +int +bond_ethdev_parse_time_ms_kvarg(const char *key, + const char *value, void *extra_args); + +void +bond_tlb_disable(struct bond_dev_private *internals); + +void +bond_tlb_enable(struct bond_dev_private *internals); + +void +bond_tlb_activate_slave(struct bond_dev_private *internals); + +void +bond_ethdev_stop(struct rte_eth_dev *eth_dev); + +void +bond_ethdev_close(struct rte_eth_dev *dev); + +#endif diff --git a/src/spdk/dpdk/drivers/net/bonding/meson.build b/src/spdk/dpdk/drivers/net/bonding/meson.build new file mode 100644 index 000000000..a3eff3b31 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/meson.build @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +name = 'bond' #, james bond :-) +sources = files('rte_eth_bond_api.c', 'rte_eth_bond_pmd.c', 'rte_eth_bond_flow.c', + 'rte_eth_bond_args.c', 'rte_eth_bond_8023ad.c', 'rte_eth_bond_alb.c') + +deps += 'sched' # needed for rte_bitmap.h +deps += ['ip_frag'] + +install_headers('rte_eth_bond.h', 'rte_eth_bond_8023ad.h') diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond.h b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond.h new file mode 100644 index 000000000..874aa91a5 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond.h @@ -0,0 +1,351 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#ifndef _RTE_ETH_BOND_H_ +#define _RTE_ETH_BOND_H_ + +/** + * @file rte_eth_bond.h + * + * RTE Link Bonding Ethernet Device + * Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple + * (slave) NICs into a single logical interface. The bonded device processes + * these interfaces based on the mode of operation specified and supported. + * This implementation supports 4 modes of operation round robin, active backup + * balance and broadcast. Providing redundant links, fault tolerance and/or + * load balancing of network ports + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <rte_ether.h> + +/* Supported modes of operation of link bonding library */ + +#define BONDING_MODE_ROUND_ROBIN (0) +/**< Round Robin (Mode 0). + * In this mode all transmitted packets will be balanced equally across all + * active slaves of the bonded in a round robin fashion. */ +#define BONDING_MODE_ACTIVE_BACKUP (1) +/**< Active Backup (Mode 1). + * In this mode all packets transmitted will be transmitted on the primary + * slave until such point as the primary slave is no longer available and then + * transmitted packets will be sent on the next available slaves. The primary + * slave can be defined by the user but defaults to the first active slave + * available if not specified. */ +#define BONDING_MODE_BALANCE (2) +/**< Balance (Mode 2). + * In this mode all packets transmitted will be balanced across the available + * slaves using one of three available transmit policies - l2, l2+3 or l3+4. + * See BALANCE_XMIT_POLICY macros definitions for further details on transmit + * policies. */ +#define BONDING_MODE_BROADCAST (3) +/**< Broadcast (Mode 3). + * In this mode all transmitted packets will be transmitted on all available + * active slaves of the bonded. */ +#define BONDING_MODE_8023AD (4) +/**< 802.3AD (Mode 4). + * + * This mode provides auto negotiation/configuration + * of peers and well as link status changes monitoring using out of band + * LACP (link aggregation control protocol) messages. For further details of + * LACP specification see the IEEE 802.3ad/802.1AX standards. It is also + * described here + * https://www.kernel.org/doc/Documentation/networking/bonding.txt. + * + * Important Usage Notes: + * - for LACP mode to work the rx/tx burst functions must be invoked + * at least once every 100ms, otherwise the out-of-band LACP messages will not + * be handled with the expected latency and this may cause the link status to be + * incorrectly marked as down or failure to correctly negotiate with peers. + * - For optimal performance during initial handshaking the array of mbufs provided + * to rx_burst should be at least 2 times the slave count size. + * + */ +#define BONDING_MODE_TLB (5) +/**< Adaptive TLB (Mode 5) + * This mode provides an adaptive transmit load balancing. It dynamically + * changes the transmitting slave, according to the computed load. Statistics + * are collected in 100ms intervals and scheduled every 10ms */ +#define BONDING_MODE_ALB (6) +/**< Adaptive Load Balancing (Mode 6) + * This mode includes adaptive TLB and receive load balancing (RLB). In RLB the + * bonding driver intercepts ARP replies send by local system and overwrites its + * source MAC address, so that different peers send data to the server on + * different slave interfaces. When local system sends ARP request, it saves IP + * information from it. When ARP reply from that peer is received, its MAC is + * stored, one of slave MACs assigned and ARP reply send to that peer. + */ + +/* Balance Mode Transmit Policies */ +#define BALANCE_XMIT_POLICY_LAYER2 (0) +/**< Layer 2 (Ethernet MAC) */ +#define BALANCE_XMIT_POLICY_LAYER23 (1) +/**< Layer 2+3 (Ethernet MAC + IP Addresses) transmit load balancing */ +#define BALANCE_XMIT_POLICY_LAYER34 (2) +/**< Layer 3+4 (IP Addresses + UDP Ports) transmit load balancing */ + +/** + * Create a bonded rte_eth_dev device + * + * @param name Name of new link bonding device. + * @param mode Mode to initialize bonding device in. + * @param socket_id Socket Id on which to allocate eth_dev resources. + * + * @return + * Port Id of created rte_eth_dev on success, negative value otherwise + */ +int +rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id); + +/** + * Free a bonded rte_eth_dev device + * + * @param name Name of the link bonding device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_free(const char *name); + +/** + * Add a rte_eth_dev device as a slave to the bonded device + * + * @param bonded_port_id Port ID of bonded device. + * @param slave_port_id Port ID of slave device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_slave_add(uint16_t bonded_port_id, uint16_t slave_port_id); + +/** + * Remove a slave rte_eth_dev device from the bonded device + * + * @param bonded_port_id Port ID of bonded device. + * @param slave_port_id Port ID of slave device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_slave_remove(uint16_t bonded_port_id, uint16_t slave_port_id); + +/** + * Set link bonding mode of bonded device + * + * @param bonded_port_id Port ID of bonded device. + * @param mode Bonding mode to set + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_mode_set(uint16_t bonded_port_id, uint8_t mode); + +/** + * Get link bonding mode of bonded device + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * link bonding mode on success, negative value otherwise + */ +int +rte_eth_bond_mode_get(uint16_t bonded_port_id); + +/** + * Set slave rte_eth_dev as primary slave of bonded device + * + * @param bonded_port_id Port ID of bonded device. + * @param slave_port_id Port ID of slave device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_primary_set(uint16_t bonded_port_id, uint16_t slave_port_id); + +/** + * Get primary slave of bonded device + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Port Id of primary slave on success, -1 on failure + */ +int +rte_eth_bond_primary_get(uint16_t bonded_port_id); + +/** + * Populate an array with list of the slaves port id's of the bonded device + * + * @param bonded_port_id Port ID of bonded eth_dev to interrogate + * @param slaves Array to be populated with the current active slaves + * @param len Length of slaves array + * + * @return + * Number of slaves associated with bonded device on success, + * negative value otherwise + */ +int +rte_eth_bond_slaves_get(uint16_t bonded_port_id, uint16_t slaves[], + uint16_t len); + +/** + * Populate an array with list of the active slaves port id's of the bonded + * device. + * + * @param bonded_port_id Port ID of bonded eth_dev to interrogate + * @param slaves Array to be populated with the current active slaves + * @param len Length of slaves array + * + * @return + * Number of active slaves associated with bonded device on success, + * negative value otherwise + */ +int +rte_eth_bond_active_slaves_get(uint16_t bonded_port_id, uint16_t slaves[], + uint16_t len); + +/** + * Set explicit MAC address to use on bonded device and it's slaves. + * + * @param bonded_port_id Port ID of bonded device. + * @param mac_addr MAC Address to use on bonded device overriding + * slaves MAC addresses + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_mac_address_set(uint16_t bonded_port_id, + struct rte_ether_addr *mac_addr); + +/** + * Reset bonded device to use MAC from primary slave on bonded device and it's + * slaves. + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_mac_address_reset(uint16_t bonded_port_id); + +/** + * Set the transmit policy for bonded device to use when it is operating in + * balance mode, this parameter is otherwise ignored in other modes of + * operation. + * + * @param bonded_port_id Port ID of bonded device. + * @param policy Balance mode transmission policy. + * + * @return + * 0 on success, negative value otherwise. + */ +int +rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy); + +/** + * Get the transmit policy set on bonded device for balance mode operation + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Balance transmit policy on success, negative value otherwise. + */ +int +rte_eth_bond_xmit_policy_get(uint16_t bonded_port_id); + +/** + * Set the link monitoring frequency (in ms) for monitoring the link status of + * slave devices + * + * @param bonded_port_id Port ID of bonded device. + * @param internal_ms Monitoring interval in milliseconds + * + * @return + * 0 on success, negative value otherwise. + */ + +int +rte_eth_bond_link_monitoring_set(uint16_t bonded_port_id, uint32_t internal_ms); + +/** + * Get the current link monitoring frequency (in ms) for monitoring of the link + * status of slave devices + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Monitoring interval on success, negative value otherwise. + */ +int +rte_eth_bond_link_monitoring_get(uint16_t bonded_port_id); + + +/** + * Set the period in milliseconds for delaying the disabling of a bonded link + * when the link down status has been detected + * + * @param bonded_port_id Port ID of bonded device. + * @param delay_ms Delay period in milliseconds. + * + * @return + * 0 on success, negative value otherwise. + */ +int +rte_eth_bond_link_down_prop_delay_set(uint16_t bonded_port_id, + uint32_t delay_ms); + +/** + * Get the period in milliseconds set for delaying the disabling of a bonded + * link when the link down status has been detected + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Delay period on success, negative value otherwise. + */ +int +rte_eth_bond_link_down_prop_delay_get(uint16_t bonded_port_id); + +/** + * Set the period in milliseconds for delaying the enabling of a bonded link + * when the link up status has been detected + * + * @param bonded_port_id Port ID of bonded device. + * @param delay_ms Delay period in milliseconds. + * + * @return + * 0 on success, negative value otherwise. + */ +int +rte_eth_bond_link_up_prop_delay_set(uint16_t bonded_port_id, + uint32_t delay_ms); + +/** + * Get the period in milliseconds set for delaying the enabling of a bonded + * link when the link up status has been detected + * + * @param bonded_port_id Port ID of bonded device. + * + * @return + * Delay period on success, negative value otherwise. + */ +int +rte_eth_bond_link_up_prop_delay_get(uint16_t bonded_port_id); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c new file mode 100644 index 000000000..b77a37ddb --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.c @@ -0,0 +1,1719 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#include <stddef.h> +#include <string.h> +#include <stdbool.h> + +#include <rte_alarm.h> +#include <rte_malloc.h> +#include <rte_errno.h> +#include <rte_cycles.h> +#include <rte_compat.h> + +#include "eth_bond_private.h" + +static void bond_mode_8023ad_ext_periodic_cb(void *arg); +#ifdef RTE_LIBRTE_BOND_DEBUG_8023AD + +#define MODE4_DEBUG(fmt, ...) \ + rte_log(RTE_LOG_DEBUG, bond_logtype, \ + "%6u [Port %u: %s] " fmt, \ + bond_dbg_get_time_diff_ms(), slave_id, \ + __func__, ##__VA_ARGS__) + +static uint64_t start_time; + +static unsigned +bond_dbg_get_time_diff_ms(void) +{ + uint64_t now; + + now = rte_rdtsc(); + if (start_time == 0) + start_time = now; + + return ((now - start_time) * 1000) / rte_get_tsc_hz(); +} + +static void +bond_print_lacp(struct lacpdu *l) +{ + char a_address[18]; + char p_address[18]; + char a_state[256] = { 0 }; + char p_state[256] = { 0 }; + + static const char * const state_labels[] = { + "ACT", "TIMEOUT", "AGG", "SYNC", "COL", "DIST", "DEF", "EXP" + }; + + int a_len = 0; + int p_len = 0; + uint8_t i; + uint8_t *addr; + + addr = l->actor.port_params.system.addr_bytes; + snprintf(a_address, sizeof(a_address), "%02X:%02X:%02X:%02X:%02X:%02X", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + + addr = l->partner.port_params.system.addr_bytes; + snprintf(p_address, sizeof(p_address), "%02X:%02X:%02X:%02X:%02X:%02X", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + + for (i = 0; i < 8; i++) { + if ((l->actor.state >> i) & 1) { + a_len += snprintf(&a_state[a_len], RTE_DIM(a_state) - a_len, "%s ", + state_labels[i]); + } + + if ((l->partner.state >> i) & 1) { + p_len += snprintf(&p_state[p_len], RTE_DIM(p_state) - p_len, "%s ", + state_labels[i]); + } + } + + if (a_len && a_state[a_len-1] == ' ') + a_state[a_len-1] = '\0'; + + if (p_len && p_state[p_len-1] == ' ') + p_state[p_len-1] = '\0'; + + RTE_BOND_LOG(DEBUG, + "LACP: {\n" + " subtype= %02X\n" + " ver_num=%02X\n" + " actor={ tlv=%02X, len=%02X\n" + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n" + " state={ %s }\n" + " }\n" + " partner={ tlv=%02X, len=%02X\n" + " pri=%04X, system=%s, key=%04X, p_pri=%04X p_num=%04X\n" + " state={ %s }\n" + " }\n" + " collector={info=%02X, length=%02X, max_delay=%04X\n, " + "type_term=%02X, terminator_length = %02X }", + l->subtype, + l->version_number, + l->actor.tlv_type_info, + l->actor.info_length, + l->actor.port_params.system_priority, + a_address, + l->actor.port_params.key, + l->actor.port_params.port_priority, + l->actor.port_params.port_number, + a_state, + l->partner.tlv_type_info, + l->partner.info_length, + l->partner.port_params.system_priority, + p_address, + l->partner.port_params.key, + l->partner.port_params.port_priority, + l->partner.port_params.port_number, + p_state, + l->tlv_type_collector_info, + l->collector_info_length, + l->collector_max_delay, + l->tlv_type_terminator, + l->terminator_length); + +} + +#define BOND_PRINT_LACP(lacpdu) bond_print_lacp(lacpdu) +#else +#define BOND_PRINT_LACP(lacpdu) do { } while (0) +#define MODE4_DEBUG(fmt, ...) do { } while (0) +#endif + +static const struct rte_ether_addr lacp_mac_addr = { + .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 } +}; + +struct port bond_mode_8023ad_ports[RTE_MAX_ETHPORTS]; + +static void +timer_cancel(uint64_t *timer) +{ + *timer = 0; +} + +static void +timer_set(uint64_t *timer, uint64_t timeout) +{ + *timer = rte_rdtsc() + timeout; +} + +/* Forces given timer to be in expired state. */ +static void +timer_force_expired(uint64_t *timer) +{ + *timer = rte_rdtsc(); +} + +static bool +timer_is_stopped(uint64_t *timer) +{ + return *timer == 0; +} + +static bool +timer_is_expired(uint64_t *timer) +{ + return *timer < rte_rdtsc(); +} + +/* Timer is in running state if it is not stopped nor expired */ +static bool +timer_is_running(uint64_t *timer) +{ + return !timer_is_stopped(timer) && !timer_is_expired(timer); +} + +static void +set_warning_flags(struct port *port, uint16_t flags) +{ + int retval; + uint16_t old; + uint16_t new_flag = 0; + + do { + old = port->warnings_to_show; + new_flag = old | flags; + retval = rte_atomic16_cmpset(&port->warnings_to_show, old, new_flag); + } while (unlikely(retval == 0)); +} + +static void +show_warnings(uint16_t slave_id) +{ + struct port *port = &bond_mode_8023ad_ports[slave_id]; + uint8_t warnings; + + do { + warnings = port->warnings_to_show; + } while (rte_atomic16_cmpset(&port->warnings_to_show, warnings, 0) == 0); + + if (!warnings) + return; + + if (!timer_is_expired(&port->warning_timer)) + return; + + + timer_set(&port->warning_timer, BOND_8023AD_WARNINGS_PERIOD_MS * + rte_get_tsc_hz() / 1000); + + if (warnings & WRN_RX_QUEUE_FULL) { + RTE_BOND_LOG(DEBUG, + "Slave %u: failed to enqueue LACP packet into RX ring.\n" + "Receive and transmit functions must be invoked on bonded" + "interface at least 10 times per second or LACP will notwork correctly", + slave_id); + } + + if (warnings & WRN_TX_QUEUE_FULL) { + RTE_BOND_LOG(DEBUG, + "Slave %u: failed to enqueue LACP packet into TX ring.\n" + "Receive and transmit functions must be invoked on bonded" + "interface at least 10 times per second or LACP will not work correctly", + slave_id); + } + + if (warnings & WRN_RX_MARKER_TO_FAST) + RTE_BOND_LOG(INFO, "Slave %u: marker to early - ignoring.", + slave_id); + + if (warnings & WRN_UNKNOWN_SLOW_TYPE) { + RTE_BOND_LOG(INFO, + "Slave %u: ignoring unknown slow protocol frame type", + slave_id); + } + + if (warnings & WRN_UNKNOWN_MARKER_TYPE) + RTE_BOND_LOG(INFO, "Slave %u: ignoring unknown marker type", + slave_id); + + if (warnings & WRN_NOT_LACP_CAPABLE) + MODE4_DEBUG("Port %u is not LACP capable!\n", slave_id); +} + +static void +record_default(struct port *port) +{ + /* Record default parameters for partner. Partner admin parameters + * are not implemented so set them to arbitrary default (last known) and + * mark actor that parner is in defaulted state. */ + port->partner_state = STATE_LACP_ACTIVE; + ACTOR_STATE_SET(port, DEFAULTED); +} + +/** Function handles rx state machine. + * + * This function implements Receive State Machine from point 5.4.12 in + * 802.1AX documentation. It should be called periodically. + * + * @param lacpdu LACPDU received. + * @param port Port on which LACPDU was received. + */ +static void +rx_machine(struct bond_dev_private *internals, uint16_t slave_id, + struct lacpdu *lacp) +{ + struct port *agg, *port = &bond_mode_8023ad_ports[slave_id]; + uint64_t timeout; + + if (SM_FLAG(port, BEGIN)) { + /* Initialize stuff */ + MODE4_DEBUG("-> INITIALIZE\n"); + SM_FLAG_CLR(port, MOVED); + port->selected = UNSELECTED; + + record_default(port); + + ACTOR_STATE_CLR(port, EXPIRED); + timer_cancel(&port->current_while_timer); + + /* DISABLED: On initialization partner is out of sync */ + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + + /* LACP DISABLED stuff if LACP not enabled on this port */ + if (!SM_FLAG(port, LACP_ENABLED)) + PARTNER_STATE_CLR(port, AGGREGATION); + else + PARTNER_STATE_SET(port, AGGREGATION); + } + + if (!SM_FLAG(port, LACP_ENABLED)) { + /* Update parameters only if state changed */ + if (!timer_is_stopped(&port->current_while_timer)) { + port->selected = UNSELECTED; + record_default(port); + PARTNER_STATE_CLR(port, AGGREGATION); + ACTOR_STATE_CLR(port, EXPIRED); + timer_cancel(&port->current_while_timer); + } + return; + } + + if (lacp) { + MODE4_DEBUG("LACP -> CURRENT\n"); + BOND_PRINT_LACP(lacp); + /* Update selected flag. If partner parameters are defaulted assume they + * are match. If not defaulted compare LACP actor with ports parner + * params. */ + if (!ACTOR_STATE(port, DEFAULTED) && + (ACTOR_STATE(port, AGGREGATION) != PARTNER_STATE(port, AGGREGATION) + || memcmp(&port->partner, &lacp->actor.port_params, + sizeof(port->partner)) != 0)) { + MODE4_DEBUG("selected <- UNSELECTED\n"); + port->selected = UNSELECTED; + } + + /* Record this PDU actor params as partner params */ + memcpy(&port->partner, &lacp->actor.port_params, + sizeof(struct port_params)); + port->partner_state = lacp->actor.state; + + /* Partner parameters are not defaulted any more */ + ACTOR_STATE_CLR(port, DEFAULTED); + + /* If LACP partner params match this port actor params */ + agg = &bond_mode_8023ad_ports[port->aggregator_port_id]; + bool match = port->actor.system_priority == + lacp->partner.port_params.system_priority && + rte_is_same_ether_addr(&agg->actor.system, + &lacp->partner.port_params.system) && + port->actor.port_priority == + lacp->partner.port_params.port_priority && + port->actor.port_number == + lacp->partner.port_params.port_number; + + /* Update NTT if partners information are outdated (xored and masked + * bits are set)*/ + uint8_t state_mask = STATE_LACP_ACTIVE | STATE_LACP_SHORT_TIMEOUT | + STATE_SYNCHRONIZATION | STATE_AGGREGATION; + + if (((port->actor_state ^ lacp->partner.state) & state_mask) || + match == false) { + SM_FLAG_SET(port, NTT); + } + + /* If LACP partner params match this port actor params */ + if (match == true && ACTOR_STATE(port, AGGREGATION) == + PARTNER_STATE(port, AGGREGATION)) + PARTNER_STATE_SET(port, SYNCHRONIZATION); + else if (!PARTNER_STATE(port, AGGREGATION) && ACTOR_STATE(port, + AGGREGATION)) + PARTNER_STATE_SET(port, SYNCHRONIZATION); + else + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + + if (ACTOR_STATE(port, LACP_SHORT_TIMEOUT)) + timeout = internals->mode4.short_timeout; + else + timeout = internals->mode4.long_timeout; + + timer_set(&port->current_while_timer, timeout); + ACTOR_STATE_CLR(port, EXPIRED); + return; /* No state change */ + } + + /* If CURRENT state timer is not running (stopped or expired) + * transit to EXPIRED state from DISABLED or CURRENT */ + if (!timer_is_running(&port->current_while_timer)) { + ACTOR_STATE_SET(port, EXPIRED); + PARTNER_STATE_CLR(port, SYNCHRONIZATION); + PARTNER_STATE_SET(port, LACP_SHORT_TIMEOUT); + timer_set(&port->current_while_timer, internals->mode4.short_timeout); + } +} + +/** + * Function handles periodic tx state machine. + * + * Function implements Periodic Transmission state machine from point 5.4.13 + * in 802.1AX documentation. It should be called periodically. + * + * @param port Port to handle state machine. + */ +static void +periodic_machine(struct bond_dev_private *internals, uint16_t slave_id) +{ + struct port *port = &bond_mode_8023ad_ports[slave_id]; + /* Calculate if either site is LACP enabled */ + uint64_t timeout; + uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) || + PARTNER_STATE(port, LACP_ACTIVE); + + uint8_t is_partner_fast, was_partner_fast; + /* No periodic is on BEGIN, LACP DISABLE or when both sides are pasive */ + if (SM_FLAG(port, BEGIN) || !SM_FLAG(port, LACP_ENABLED) || !active) { + timer_cancel(&port->periodic_timer); + timer_force_expired(&port->tx_machine_timer); + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); + + MODE4_DEBUG("-> NO_PERIODIC ( %s%s%s)\n", + SM_FLAG(port, BEGIN) ? "begind " : "", + SM_FLAG(port, LACP_ENABLED) ? "" : "LACP disabled ", + active ? "LACP active " : "LACP pasive "); + return; + } + + is_partner_fast = PARTNER_STATE(port, LACP_SHORT_TIMEOUT); + was_partner_fast = SM_FLAG(port, PARTNER_SHORT_TIMEOUT); + + /* If periodic timer is not started, transit from NO PERIODIC to FAST/SLOW. + * Other case: check if timer expire or partners settings changed. */ + if (!timer_is_stopped(&port->periodic_timer)) { + if (timer_is_expired(&port->periodic_timer)) { + SM_FLAG_SET(port, NTT); + } else if (is_partner_fast != was_partner_fast) { + /* Partners timeout was slow and now it is fast -> send LACP. + * In other case (was fast and now it is slow) just switch + * timeout to slow without forcing send of LACP (because standard + * say so)*/ + if (is_partner_fast) + SM_FLAG_SET(port, NTT); + } else + return; /* Nothing changed */ + } + + /* Handle state transition to FAST/SLOW LACP timeout */ + if (is_partner_fast) { + timeout = internals->mode4.fast_periodic_timeout; + SM_FLAG_SET(port, PARTNER_SHORT_TIMEOUT); + } else { + timeout = internals->mode4.slow_periodic_timeout; + SM_FLAG_CLR(port, PARTNER_SHORT_TIMEOUT); + } + + timer_set(&port->periodic_timer, timeout); +} + +/** + * Function handles mux state machine. + * + * Function implements Mux Machine from point 5.4.15 in 802.1AX documentation. + * It should be called periodically. + * + * @param port Port to handle state machine. + */ +static void +mux_machine(struct bond_dev_private *internals, uint16_t slave_id) +{ + struct port *port = &bond_mode_8023ad_ports[slave_id]; + + /* Save current state for later use */ + const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING | + STATE_COLLECTING; + + /* Enter DETACHED state on BEGIN condition or from any other state if + * port was unselected */ + if (SM_FLAG(port, BEGIN) || + port->selected == UNSELECTED || (port->selected == STANDBY && + (port->actor_state & state_mask) != 0)) { + /* detach mux from aggregator */ + port->actor_state &= ~state_mask; + /* Set ntt to true if BEGIN condition or transition from any other state + * which is indicated that wait_while_timer was started */ + if (SM_FLAG(port, BEGIN) || + !timer_is_stopped(&port->wait_while_timer)) { + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("-> DETACHED\n"); + } + timer_cancel(&port->wait_while_timer); + } + + if (timer_is_stopped(&port->wait_while_timer)) { + if (port->selected == SELECTED || port->selected == STANDBY) { + timer_set(&port->wait_while_timer, + internals->mode4.aggregate_wait_timeout); + + MODE4_DEBUG("DETACHED -> WAITING\n"); + } + /* Waiting state entered */ + return; + } + + /* Transit next state if port is ready */ + if (!timer_is_expired(&port->wait_while_timer)) + return; + + if ((ACTOR_STATE(port, DISTRIBUTING) || ACTOR_STATE(port, COLLECTING)) && + !PARTNER_STATE(port, SYNCHRONIZATION)) { + /* If in COLLECTING or DISTRIBUTING state and partner becomes out of + * sync transit to ATACHED state. */ + ACTOR_STATE_CLR(port, DISTRIBUTING); + ACTOR_STATE_CLR(port, COLLECTING); + /* Clear actor sync to activate transit ATACHED in condition bellow */ + ACTOR_STATE_CLR(port, SYNCHRONIZATION); + MODE4_DEBUG("Out of sync -> ATTACHED\n"); + } + + if (!ACTOR_STATE(port, SYNCHRONIZATION)) { + /* attach mux to aggregator */ + RTE_ASSERT((port->actor_state & (STATE_COLLECTING | + STATE_DISTRIBUTING)) == 0); + + ACTOR_STATE_SET(port, SYNCHRONIZATION); + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("ATTACHED Entered\n"); + } else if (!ACTOR_STATE(port, COLLECTING)) { + /* Start collecting if in sync */ + if (PARTNER_STATE(port, SYNCHRONIZATION)) { + MODE4_DEBUG("ATTACHED -> COLLECTING\n"); + ACTOR_STATE_SET(port, COLLECTING); + SM_FLAG_SET(port, NTT); + } + } else if (ACTOR_STATE(port, COLLECTING)) { + /* Check if partner is in COLLECTING state. If so this port can + * distribute frames to it */ + if (!ACTOR_STATE(port, DISTRIBUTING)) { + if (PARTNER_STATE(port, COLLECTING)) { + /* Enable DISTRIBUTING if partner is collecting */ + ACTOR_STATE_SET(port, DISTRIBUTING); + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("COLLECTING -> DISTRIBUTING\n"); + RTE_BOND_LOG(INFO, + "Bond %u: slave id %u distributing started.", + internals->port_id, slave_id); + } + } else { + if (!PARTNER_STATE(port, COLLECTING)) { + /* Disable DISTRIBUTING (enter COLLECTING state) if partner + * is not collecting */ + ACTOR_STATE_CLR(port, DISTRIBUTING); + SM_FLAG_SET(port, NTT); + MODE4_DEBUG("DISTRIBUTING -> COLLECTING\n"); + RTE_BOND_LOG(INFO, + "Bond %u: slave id %u distributing stopped.", + internals->port_id, slave_id); + } + } + } +} + +/** + * Function handles transmit state machine. + * + * Function implements Transmit Machine from point 5.4.16 in 802.1AX + * documentation. + * + * @param port + */ +static void +tx_machine(struct bond_dev_private *internals, uint16_t slave_id) +{ + struct port *agg, *port = &bond_mode_8023ad_ports[slave_id]; + + struct rte_mbuf *lacp_pkt = NULL; + struct lacpdu_header *hdr; + struct lacpdu *lacpdu; + + /* If periodic timer is not running periodic machine is in NO PERIODIC and + * according to 802.3ax standard tx machine should not transmit any frames + * and set ntt to false. */ + if (timer_is_stopped(&port->periodic_timer)) + SM_FLAG_CLR(port, NTT); + + if (!SM_FLAG(port, NTT)) + return; + + if (!timer_is_expired(&port->tx_machine_timer)) + return; + + lacp_pkt = rte_pktmbuf_alloc(port->mbuf_pool); + if (lacp_pkt == NULL) { + RTE_BOND_LOG(ERR, "Failed to allocate LACP packet from pool"); + return; + } + + lacp_pkt->data_len = sizeof(*hdr); + lacp_pkt->pkt_len = sizeof(*hdr); + + hdr = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *); + + /* Source and destination MAC */ + rte_ether_addr_copy(&lacp_mac_addr, &hdr->eth_hdr.d_addr); + rte_eth_macaddr_get(slave_id, &hdr->eth_hdr.s_addr); + hdr->eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_SLOW); + + lacpdu = &hdr->lacpdu; + memset(lacpdu, 0, sizeof(*lacpdu)); + + /* Initialize LACP part */ + lacpdu->subtype = SLOW_SUBTYPE_LACP; + lacpdu->version_number = 1; + + /* ACTOR */ + lacpdu->actor.tlv_type_info = TLV_TYPE_ACTOR_INFORMATION; + lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params); + memcpy(&hdr->lacpdu.actor.port_params, &port->actor, + sizeof(port->actor)); + agg = &bond_mode_8023ad_ports[port->aggregator_port_id]; + rte_ether_addr_copy(&agg->actor.system, + &hdr->lacpdu.actor.port_params.system); + lacpdu->actor.state = port->actor_state; + + /* PARTNER */ + lacpdu->partner.tlv_type_info = TLV_TYPE_PARTNER_INFORMATION; + lacpdu->partner.info_length = sizeof(struct lacpdu_actor_partner_params); + memcpy(&lacpdu->partner.port_params, &port->partner, + sizeof(struct port_params)); + lacpdu->partner.state = port->partner_state; + + /* Other fields */ + lacpdu->tlv_type_collector_info = TLV_TYPE_COLLECTOR_INFORMATION; + lacpdu->collector_info_length = 0x10; + lacpdu->collector_max_delay = 0; + + lacpdu->tlv_type_terminator = TLV_TYPE_TERMINATOR_INFORMATION; + lacpdu->terminator_length = 0; + + MODE4_DEBUG("Sending LACP frame\n"); + BOND_PRINT_LACP(lacpdu); + + if (internals->mode4.dedicated_queues.enabled == 0) { + int retval = rte_ring_enqueue(port->tx_ring, lacp_pkt); + if (retval != 0) { + /* If TX ring full, drop packet and free message. + Retransmission will happen in next function call. */ + rte_pktmbuf_free(lacp_pkt); + set_warning_flags(port, WRN_TX_QUEUE_FULL); + return; + } + } else { + uint16_t pkts_sent = rte_eth_tx_burst(slave_id, + internals->mode4.dedicated_queues.tx_qid, + &lacp_pkt, 1); + if (pkts_sent != 1) { + rte_pktmbuf_free(lacp_pkt); + set_warning_flags(port, WRN_TX_QUEUE_FULL); + return; + } + } + + + timer_set(&port->tx_machine_timer, internals->mode4.tx_period_timeout); + SM_FLAG_CLR(port, NTT); +} + +static uint16_t +max_index(uint64_t *a, int n) +{ + if (n <= 0) + return -1; + + int i, max_i = 0; + uint64_t max = a[0]; + + for (i = 1; i < n; ++i) { + if (a[i] > max) { + max = a[i]; + max_i = i; + } + } + + return max_i; +} + +/** + * Function assigns port to aggregator. + * + * @param bond_dev_private Pointer to bond_dev_private structure. + * @param port_pos Port to assign. + */ +static void +selection_logic(struct bond_dev_private *internals, uint16_t slave_id) +{ + struct port *agg, *port; + uint16_t slaves_count, new_agg_id, i, j = 0; + uint16_t *slaves; + uint64_t agg_bandwidth[RTE_MAX_ETHPORTS] = {0}; + uint64_t agg_count[RTE_MAX_ETHPORTS] = {0}; + uint16_t default_slave = 0; + struct rte_eth_link link_info; + uint16_t agg_new_idx = 0; + int ret; + + slaves = internals->active_slaves; + slaves_count = internals->active_slave_count; + port = &bond_mode_8023ad_ports[slave_id]; + + /* Search for aggregator suitable for this port */ + for (i = 0; i < slaves_count; ++i) { + agg = &bond_mode_8023ad_ports[slaves[i]]; + /* Skip ports that are not aggreagators */ + if (agg->aggregator_port_id != slaves[i]) + continue; + + ret = rte_eth_link_get_nowait(slaves[i], &link_info); + if (ret < 0) { + RTE_BOND_LOG(ERR, + "Slave (port %u) link get failed: %s\n", + slaves[i], rte_strerror(-ret)); + continue; + } + agg_count[i] += 1; + agg_bandwidth[i] += link_info.link_speed; + + /* Actors system ID is not checked since all slave device have the same + * ID (MAC address). */ + if ((agg->actor.key == port->actor.key && + agg->partner.system_priority == port->partner.system_priority && + rte_is_same_ether_addr(&agg->partner.system, + &port->partner.system) == 1 + && (agg->partner.key == port->partner.key)) && + rte_is_zero_ether_addr(&port->partner.system) != 1 && + (agg->actor.key & + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) != 0) { + + if (j == 0) + default_slave = i; + j++; + } + } + + switch (internals->mode4.agg_selection) { + case AGG_COUNT: + agg_new_idx = max_index(agg_count, slaves_count); + new_agg_id = slaves[agg_new_idx]; + break; + case AGG_BANDWIDTH: + agg_new_idx = max_index(agg_bandwidth, slaves_count); + new_agg_id = slaves[agg_new_idx]; + break; + case AGG_STABLE: + if (default_slave == slaves_count) + new_agg_id = slaves[slave_id]; + else + new_agg_id = slaves[default_slave]; + break; + default: + if (default_slave == slaves_count) + new_agg_id = slaves[slave_id]; + else + new_agg_id = slaves[default_slave]; + break; + } + + if (new_agg_id != port->aggregator_port_id) { + port->aggregator_port_id = new_agg_id; + + MODE4_DEBUG("-> SELECTED: ID=%3u\n" + "\t%s aggregator ID=%3u\n", + port->aggregator_port_id, + port->aggregator_port_id == slave_id ? + "aggregator not found, using default" : "aggregator found", + port->aggregator_port_id); + } + + port->selected = SELECTED; +} + +/* Function maps DPDK speed to bonding speed stored in key field */ +static uint16_t +link_speed_key(uint16_t speed) { + uint16_t key_speed; + + switch (speed) { + case ETH_SPEED_NUM_NONE: + key_speed = 0x00; + break; + case ETH_SPEED_NUM_10M: + key_speed = BOND_LINK_SPEED_KEY_10M; + break; + case ETH_SPEED_NUM_100M: + key_speed = BOND_LINK_SPEED_KEY_100M; + break; + case ETH_SPEED_NUM_1G: + key_speed = BOND_LINK_SPEED_KEY_1000M; + break; + case ETH_SPEED_NUM_10G: + key_speed = BOND_LINK_SPEED_KEY_10G; + break; + case ETH_SPEED_NUM_20G: + key_speed = BOND_LINK_SPEED_KEY_20G; + break; + case ETH_SPEED_NUM_40G: + key_speed = BOND_LINK_SPEED_KEY_40G; + break; + default: + /* Unknown speed*/ + key_speed = 0xFFFF; + } + + return key_speed; +} + +static void +rx_machine_update(struct bond_dev_private *internals, uint16_t slave_id, + struct rte_mbuf *lacp_pkt) { + struct lacpdu_header *lacp; + struct lacpdu_actor_partner_params *partner; + + if (lacp_pkt != NULL) { + lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *); + RTE_ASSERT(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP); + + partner = &lacp->lacpdu.partner; + if (rte_is_same_ether_addr(&partner->port_params.system, + &internals->mode4.mac_addr)) { + /* This LACP frame is sending to the bonding port + * so pass it to rx_machine. + */ + rx_machine(internals, slave_id, &lacp->lacpdu); + } + rte_pktmbuf_free(lacp_pkt); + } else + rx_machine(internals, slave_id, NULL); +} + +static void +bond_mode_8023ad_periodic_cb(void *arg) +{ + struct rte_eth_dev *bond_dev = arg; + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct port *port; + struct rte_eth_link link_info; + struct rte_ether_addr slave_addr; + struct rte_mbuf *lacp_pkt = NULL; + uint16_t slave_id; + uint16_t i; + + + /* Update link status on each port */ + for (i = 0; i < internals->active_slave_count; i++) { + uint16_t key; + int ret; + + slave_id = internals->active_slaves[i]; + ret = rte_eth_link_get_nowait(slave_id, &link_info); + if (ret < 0) { + RTE_BOND_LOG(ERR, + "Slave (port %u) link get failed: %s\n", + slave_id, rte_strerror(-ret)); + } + + if (ret >= 0 && link_info.link_status != 0) { + key = link_speed_key(link_info.link_speed) << 1; + if (link_info.link_duplex == ETH_LINK_FULL_DUPLEX) + key |= BOND_LINK_FULL_DUPLEX_KEY; + } else { + key = 0; + } + + rte_eth_macaddr_get(slave_id, &slave_addr); + port = &bond_mode_8023ad_ports[slave_id]; + + key = rte_cpu_to_be_16(key); + if (key != port->actor.key) { + if (!(key & rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY))) + set_warning_flags(port, WRN_NOT_LACP_CAPABLE); + + port->actor.key = key; + SM_FLAG_SET(port, NTT); + } + + if (!rte_is_same_ether_addr(&port->actor.system, &slave_addr)) { + rte_ether_addr_copy(&slave_addr, &port->actor.system); + if (port->aggregator_port_id == slave_id) + SM_FLAG_SET(port, NTT); + } + } + + for (i = 0; i < internals->active_slave_count; i++) { + slave_id = internals->active_slaves[i]; + port = &bond_mode_8023ad_ports[slave_id]; + + if ((port->actor.key & + rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) { + + SM_FLAG_SET(port, BEGIN); + + /* LACP is disabled on half duples or link is down */ + if (SM_FLAG(port, LACP_ENABLED)) { + /* If port was enabled set it to BEGIN state */ + SM_FLAG_CLR(port, LACP_ENABLED); + ACTOR_STATE_CLR(port, DISTRIBUTING); + ACTOR_STATE_CLR(port, COLLECTING); + } + + /* Skip this port processing */ + continue; + } + + SM_FLAG_SET(port, LACP_ENABLED); + + if (internals->mode4.dedicated_queues.enabled == 0) { + /* Find LACP packet to this port. Do not check subtype, + * it is done in function that queued packet + */ + int retval = rte_ring_dequeue(port->rx_ring, + (void **)&lacp_pkt); + + if (retval != 0) + lacp_pkt = NULL; + + rx_machine_update(internals, slave_id, lacp_pkt); + } else { + uint16_t rx_count = rte_eth_rx_burst(slave_id, + internals->mode4.dedicated_queues.rx_qid, + &lacp_pkt, 1); + + if (rx_count == 1) + bond_mode_8023ad_handle_slow_pkt(internals, + slave_id, lacp_pkt); + else + rx_machine_update(internals, slave_id, NULL); + } + + periodic_machine(internals, slave_id); + mux_machine(internals, slave_id); + tx_machine(internals, slave_id); + selection_logic(internals, slave_id); + + SM_FLAG_CLR(port, BEGIN); + show_warnings(slave_id); + } + + rte_eal_alarm_set(internals->mode4.update_timeout_us, + bond_mode_8023ad_periodic_cb, arg); +} + +static int +bond_mode_8023ad_register_lacp_mac(uint16_t slave_id) +{ + int ret; + + ret = rte_eth_allmulticast_enable(slave_id); + if (ret != 0) { + RTE_BOND_LOG(ERR, + "failed to enable allmulti mode for port %u: %s", + slave_id, rte_strerror(-ret)); + } + if (rte_eth_allmulticast_get(slave_id)) { + RTE_BOND_LOG(DEBUG, "forced allmulti for port %u", + slave_id); + bond_mode_8023ad_ports[slave_id].forced_rx_flags = + BOND_8023AD_FORCED_ALLMULTI; + return 0; + } + + ret = rte_eth_promiscuous_enable(slave_id); + if (ret != 0) { + RTE_BOND_LOG(ERR, + "failed to enable promiscuous mode for port %u: %s", + slave_id, rte_strerror(-ret)); + } + if (rte_eth_promiscuous_get(slave_id)) { + RTE_BOND_LOG(DEBUG, "forced promiscuous for port %u", + slave_id); + bond_mode_8023ad_ports[slave_id].forced_rx_flags = + BOND_8023AD_FORCED_PROMISC; + return 0; + } + + return -1; +} + +static void +bond_mode_8023ad_unregister_lacp_mac(uint16_t slave_id) +{ + int ret; + + switch (bond_mode_8023ad_ports[slave_id].forced_rx_flags) { + case BOND_8023AD_FORCED_ALLMULTI: + RTE_BOND_LOG(DEBUG, "unset allmulti for port %u", slave_id); + ret = rte_eth_allmulticast_disable(slave_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "failed to disable allmulti mode for port %u: %s", + slave_id, rte_strerror(-ret)); + break; + + case BOND_8023AD_FORCED_PROMISC: + RTE_BOND_LOG(DEBUG, "unset promisc for port %u", slave_id); + ret = rte_eth_promiscuous_disable(slave_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "failed to disable promiscuous mode for port %u: %s", + slave_id, rte_strerror(-ret)); + break; + + default: + break; + } +} + +void +bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev, + uint16_t slave_id) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + + struct port *port = &bond_mode_8023ad_ports[slave_id]; + struct port_params initial = { + .system = { { 0 } }, + .system_priority = rte_cpu_to_be_16(0xFFFF), + .key = rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY), + .port_priority = rte_cpu_to_be_16(0x00FF), + .port_number = 0, + }; + + char mem_name[RTE_ETH_NAME_MAX_LEN]; + int socket_id; + unsigned element_size; + uint32_t total_tx_desc; + struct bond_tx_queue *bd_tx_q; + uint16_t q_id; + + /* Given slave mus not be in active list */ + RTE_ASSERT(find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) == internals->active_slave_count); + RTE_SET_USED(internals); /* used only for assert when enabled */ + + memcpy(&port->actor, &initial, sizeof(struct port_params)); + /* Standard requires that port ID must be grater than 0. + * Add 1 do get corresponding port_number */ + port->actor.port_number = rte_cpu_to_be_16(slave_id + 1); + + memcpy(&port->partner, &initial, sizeof(struct port_params)); + + /* default states */ + port->actor_state = STATE_AGGREGATION | STATE_LACP_ACTIVE | STATE_DEFAULTED; + port->partner_state = STATE_LACP_ACTIVE | STATE_AGGREGATION; + port->sm_flags = SM_FLAGS_BEGIN; + + /* use this port as agregator */ + port->aggregator_port_id = slave_id; + + if (bond_mode_8023ad_register_lacp_mac(slave_id) < 0) { + RTE_BOND_LOG(WARNING, "slave %u is most likely broken and won't receive LACP packets", + slave_id); + } + + timer_cancel(&port->warning_timer); + + if (port->mbuf_pool != NULL) + return; + + RTE_ASSERT(port->rx_ring == NULL); + RTE_ASSERT(port->tx_ring == NULL); + + socket_id = rte_eth_dev_socket_id(slave_id); + if (socket_id == (int)LCORE_ID_ANY) + socket_id = rte_socket_id(); + + element_size = sizeof(struct slow_protocol_frame) + + RTE_PKTMBUF_HEADROOM; + + /* The size of the mempool should be at least: + * the sum of the TX descriptors + BOND_MODE_8023AX_SLAVE_TX_PKTS */ + total_tx_desc = BOND_MODE_8023AX_SLAVE_TX_PKTS; + for (q_id = 0; q_id < bond_dev->data->nb_tx_queues; q_id++) { + bd_tx_q = (struct bond_tx_queue*)bond_dev->data->tx_queues[q_id]; + total_tx_desc += bd_tx_q->nb_tx_desc; + } + + snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_pool", slave_id); + port->mbuf_pool = rte_pktmbuf_pool_create(mem_name, total_tx_desc, + RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? + 32 : RTE_MEMPOOL_CACHE_MAX_SIZE, + 0, element_size, socket_id); + + /* Any memory allocation failure in initialization is critical because + * resources can't be free, so reinitialization is impossible. */ + if (port->mbuf_pool == NULL) { + rte_panic("Slave %u: Failed to create memory pool '%s': %s\n", + slave_id, mem_name, rte_strerror(rte_errno)); + } + + snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_rx", slave_id); + port->rx_ring = rte_ring_create(mem_name, + rte_align32pow2(BOND_MODE_8023AX_SLAVE_RX_PKTS), socket_id, 0); + + if (port->rx_ring == NULL) { + rte_panic("Slave %u: Failed to create rx ring '%s': %s\n", slave_id, + mem_name, rte_strerror(rte_errno)); + } + + /* TX ring is at least one pkt longer to make room for marker packet. */ + snprintf(mem_name, RTE_DIM(mem_name), "slave_%u_tx", slave_id); + port->tx_ring = rte_ring_create(mem_name, + rte_align32pow2(BOND_MODE_8023AX_SLAVE_TX_PKTS + 1), socket_id, 0); + + if (port->tx_ring == NULL) { + rte_panic("Slave %u: Failed to create tx ring '%s': %s\n", slave_id, + mem_name, rte_strerror(rte_errno)); + } +} + +int +bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev __rte_unused, + uint16_t slave_id) +{ + void *pkt = NULL; + struct port *port = NULL; + uint8_t old_partner_state; + + port = &bond_mode_8023ad_ports[slave_id]; + + ACTOR_STATE_CLR(port, AGGREGATION); + port->selected = UNSELECTED; + + old_partner_state = port->partner_state; + record_default(port); + + bond_mode_8023ad_unregister_lacp_mac(slave_id); + + /* If partner timeout state changes then disable timer */ + if (!((old_partner_state ^ port->partner_state) & + STATE_LACP_SHORT_TIMEOUT)) + timer_cancel(&port->current_while_timer); + + PARTNER_STATE_CLR(port, AGGREGATION); + ACTOR_STATE_CLR(port, EXPIRED); + + /* flush rx/tx rings */ + while (rte_ring_dequeue(port->rx_ring, &pkt) == 0) + rte_pktmbuf_free((struct rte_mbuf *)pkt); + + while (rte_ring_dequeue(port->tx_ring, &pkt) == 0) + rte_pktmbuf_free((struct rte_mbuf *)pkt); + return 0; +} + +void +bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct rte_ether_addr slave_addr; + struct port *slave, *agg_slave; + uint16_t slave_id, i, j; + + bond_mode_8023ad_stop(bond_dev); + + for (i = 0; i < internals->active_slave_count; i++) { + slave_id = internals->active_slaves[i]; + slave = &bond_mode_8023ad_ports[slave_id]; + rte_eth_macaddr_get(slave_id, &slave_addr); + + if (rte_is_same_ether_addr(&slave_addr, &slave->actor.system)) + continue; + + rte_ether_addr_copy(&slave_addr, &slave->actor.system); + /* Do nothing if this port is not an aggregator. In other case + * Set NTT flag on every port that use this aggregator. */ + if (slave->aggregator_port_id != slave_id) + continue; + + for (j = 0; j < internals->active_slave_count; j++) { + agg_slave = &bond_mode_8023ad_ports[internals->active_slaves[j]]; + if (agg_slave->aggregator_port_id == slave_id) + SM_FLAG_SET(agg_slave, NTT); + } + } + + if (bond_dev->data->dev_started) + bond_mode_8023ad_start(bond_dev); +} + +static void +bond_mode_8023ad_conf_get(struct rte_eth_dev *dev, + struct rte_eth_bond_8023ad_conf *conf) +{ + struct bond_dev_private *internals = dev->data->dev_private; + struct mode8023ad_private *mode4 = &internals->mode4; + uint64_t ms_ticks = rte_get_tsc_hz() / 1000; + + conf->fast_periodic_ms = mode4->fast_periodic_timeout / ms_ticks; + conf->slow_periodic_ms = mode4->slow_periodic_timeout / ms_ticks; + conf->short_timeout_ms = mode4->short_timeout / ms_ticks; + conf->long_timeout_ms = mode4->long_timeout / ms_ticks; + conf->aggregate_wait_timeout_ms = mode4->aggregate_wait_timeout / ms_ticks; + conf->tx_period_ms = mode4->tx_period_timeout / ms_ticks; + conf->update_timeout_ms = mode4->update_timeout_us / 1000; + conf->rx_marker_period_ms = mode4->rx_marker_timeout / ms_ticks; + conf->slowrx_cb = mode4->slowrx_cb; + conf->agg_selection = mode4->agg_selection; +} + +static void +bond_mode_8023ad_conf_get_default(struct rte_eth_bond_8023ad_conf *conf) +{ + conf->fast_periodic_ms = BOND_8023AD_FAST_PERIODIC_MS; + conf->slow_periodic_ms = BOND_8023AD_SLOW_PERIODIC_MS; + conf->short_timeout_ms = BOND_8023AD_SHORT_TIMEOUT_MS; + conf->long_timeout_ms = BOND_8023AD_LONG_TIMEOUT_MS; + conf->aggregate_wait_timeout_ms = BOND_8023AD_AGGREGATE_WAIT_TIMEOUT_MS; + conf->tx_period_ms = BOND_8023AD_TX_MACHINE_PERIOD_MS; + conf->rx_marker_period_ms = BOND_8023AD_RX_MARKER_PERIOD_MS; + conf->update_timeout_ms = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS; + conf->slowrx_cb = NULL; + conf->agg_selection = AGG_STABLE; +} + +static void +bond_mode_8023ad_conf_assign(struct mode8023ad_private *mode4, + struct rte_eth_bond_8023ad_conf *conf) +{ + uint64_t ms_ticks = rte_get_tsc_hz() / 1000; + + mode4->fast_periodic_timeout = conf->fast_periodic_ms * ms_ticks; + mode4->slow_periodic_timeout = conf->slow_periodic_ms * ms_ticks; + mode4->short_timeout = conf->short_timeout_ms * ms_ticks; + mode4->long_timeout = conf->long_timeout_ms * ms_ticks; + mode4->aggregate_wait_timeout = conf->aggregate_wait_timeout_ms * ms_ticks; + mode4->tx_period_timeout = conf->tx_period_ms * ms_ticks; + mode4->rx_marker_timeout = conf->rx_marker_period_ms * ms_ticks; + mode4->update_timeout_us = conf->update_timeout_ms * 1000; + + mode4->dedicated_queues.enabled = 0; + mode4->dedicated_queues.rx_qid = UINT16_MAX; + mode4->dedicated_queues.tx_qid = UINT16_MAX; +} + +void +bond_mode_8023ad_setup(struct rte_eth_dev *dev, + struct rte_eth_bond_8023ad_conf *conf) +{ + struct rte_eth_bond_8023ad_conf def_conf; + struct bond_dev_private *internals = dev->data->dev_private; + struct mode8023ad_private *mode4 = &internals->mode4; + + if (conf == NULL) { + conf = &def_conf; + bond_mode_8023ad_conf_get_default(conf); + } + + bond_mode_8023ad_stop(dev); + bond_mode_8023ad_conf_assign(mode4, conf); + mode4->slowrx_cb = conf->slowrx_cb; + mode4->agg_selection = AGG_STABLE; + + if (dev->data->dev_started) + bond_mode_8023ad_start(dev); +} + +int +bond_mode_8023ad_enable(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + uint16_t i; + + for (i = 0; i < internals->active_slave_count; i++) + bond_mode_8023ad_activate_slave(bond_dev, + internals->active_slaves[i]); + + return 0; +} + +int +bond_mode_8023ad_start(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct mode8023ad_private *mode4 = &internals->mode4; + static const uint64_t us = BOND_MODE_8023AX_UPDATE_TIMEOUT_MS * 1000; + + rte_eth_macaddr_get(internals->port_id, &mode4->mac_addr); + if (mode4->slowrx_cb) + return rte_eal_alarm_set(us, &bond_mode_8023ad_ext_periodic_cb, + bond_dev); + + return rte_eal_alarm_set(us, &bond_mode_8023ad_periodic_cb, bond_dev); +} + +void +bond_mode_8023ad_stop(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct mode8023ad_private *mode4 = &internals->mode4; + + if (mode4->slowrx_cb) { + rte_eal_alarm_cancel(&bond_mode_8023ad_ext_periodic_cb, + bond_dev); + return; + } + rte_eal_alarm_cancel(&bond_mode_8023ad_periodic_cb, bond_dev); +} + +void +bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals, + uint16_t slave_id, struct rte_mbuf *pkt) +{ + struct mode8023ad_private *mode4 = &internals->mode4; + struct port *port = &bond_mode_8023ad_ports[slave_id]; + struct marker_header *m_hdr; + uint64_t marker_timer, old_marker_timer; + int retval; + uint8_t wrn, subtype; + /* If packet is a marker, we send response now by reusing given packet + * and update only source MAC, destination MAC is multicast so don't + * update it. Other frames will be handled later by state machines */ + subtype = rte_pktmbuf_mtod(pkt, + struct slow_protocol_frame *)->slow_protocol.subtype; + + if (subtype == SLOW_SUBTYPE_MARKER) { + m_hdr = rte_pktmbuf_mtod(pkt, struct marker_header *); + + if (likely(m_hdr->marker.tlv_type_marker != MARKER_TLV_TYPE_INFO)) { + wrn = WRN_UNKNOWN_MARKER_TYPE; + goto free_out; + } + + /* Setup marker timer. Do it in loop in case concurrent access. */ + do { + old_marker_timer = port->rx_marker_timer; + if (!timer_is_expired(&old_marker_timer)) { + wrn = WRN_RX_MARKER_TO_FAST; + goto free_out; + } + + timer_set(&marker_timer, mode4->rx_marker_timeout); + retval = rte_atomic64_cmpset(&port->rx_marker_timer, + old_marker_timer, marker_timer); + } while (unlikely(retval == 0)); + + m_hdr->marker.tlv_type_marker = MARKER_TLV_TYPE_RESP; + rte_eth_macaddr_get(slave_id, &m_hdr->eth_hdr.s_addr); + + if (internals->mode4.dedicated_queues.enabled == 0) { + int retval = rte_ring_enqueue(port->tx_ring, pkt); + if (retval != 0) { + /* reset timer */ + port->rx_marker_timer = 0; + wrn = WRN_TX_QUEUE_FULL; + goto free_out; + } + } else { + /* Send packet directly to the slow queue */ + uint16_t tx_count = rte_eth_tx_burst(slave_id, + internals->mode4.dedicated_queues.tx_qid, + &pkt, 1); + if (tx_count != 1) { + /* reset timer */ + port->rx_marker_timer = 0; + wrn = WRN_TX_QUEUE_FULL; + goto free_out; + } + } + } else if (likely(subtype == SLOW_SUBTYPE_LACP)) { + if (internals->mode4.dedicated_queues.enabled == 0) { + int retval = rte_ring_enqueue(port->rx_ring, pkt); + if (retval != 0) { + /* If RX fing full free lacpdu message and drop packet */ + wrn = WRN_RX_QUEUE_FULL; + goto free_out; + } + } else + rx_machine_update(internals, slave_id, pkt); + } else { + wrn = WRN_UNKNOWN_SLOW_TYPE; + goto free_out; + } + + return; + +free_out: + set_warning_flags(port, wrn); + rte_pktmbuf_free(pkt); +} + +int +rte_eth_bond_8023ad_conf_get(uint16_t port_id, + struct rte_eth_bond_8023ad_conf *conf) +{ + struct rte_eth_dev *bond_dev; + + if (valid_bonded_port_id(port_id) != 0) + return -EINVAL; + + if (conf == NULL) + return -EINVAL; + + bond_dev = &rte_eth_devices[port_id]; + bond_mode_8023ad_conf_get(bond_dev, conf); + return 0; +} + +int +rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id, + enum rte_bond_8023ad_agg_selection agg_selection) +{ + struct rte_eth_dev *bond_dev; + struct bond_dev_private *internals; + struct mode8023ad_private *mode4; + + if (valid_bonded_port_id(port_id) != 0) + return -EINVAL; + + bond_dev = &rte_eth_devices[port_id]; + internals = bond_dev->data->dev_private; + + if (internals->mode != 4) + return -EINVAL; + + mode4 = &internals->mode4; + if (agg_selection == AGG_COUNT || agg_selection == AGG_BANDWIDTH + || agg_selection == AGG_STABLE) + mode4->agg_selection = agg_selection; + return 0; +} + +int rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id) +{ + struct rte_eth_dev *bond_dev; + struct bond_dev_private *internals; + struct mode8023ad_private *mode4; + + if (valid_bonded_port_id(port_id) != 0) + return -EINVAL; + + bond_dev = &rte_eth_devices[port_id]; + internals = bond_dev->data->dev_private; + + if (internals->mode != 4) + return -EINVAL; + mode4 = &internals->mode4; + + return mode4->agg_selection; +} + + + +static int +bond_8023ad_setup_validate(uint16_t port_id, + struct rte_eth_bond_8023ad_conf *conf) +{ + if (valid_bonded_port_id(port_id) != 0) + return -EINVAL; + + if (conf != NULL) { + /* Basic sanity check */ + if (conf->slow_periodic_ms == 0 || + conf->fast_periodic_ms >= conf->slow_periodic_ms || + conf->long_timeout_ms == 0 || + conf->short_timeout_ms >= conf->long_timeout_ms || + conf->aggregate_wait_timeout_ms == 0 || + conf->tx_period_ms == 0 || + conf->rx_marker_period_ms == 0 || + conf->update_timeout_ms == 0) { + RTE_BOND_LOG(ERR, "given mode 4 configuration is invalid"); + return -EINVAL; + } + } + + return 0; +} + + +int +rte_eth_bond_8023ad_setup(uint16_t port_id, + struct rte_eth_bond_8023ad_conf *conf) +{ + struct rte_eth_dev *bond_dev; + int err; + + err = bond_8023ad_setup_validate(port_id, conf); + if (err != 0) + return err; + + bond_dev = &rte_eth_devices[port_id]; + bond_mode_8023ad_setup(bond_dev, conf); + + return 0; +} + + + + + +int +rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id, + struct rte_eth_bond_8023ad_slave_info *info) +{ + struct rte_eth_dev *bond_dev; + struct bond_dev_private *internals; + struct port *port; + + if (info == NULL || valid_bonded_port_id(port_id) != 0 || + rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD) + return -EINVAL; + + bond_dev = &rte_eth_devices[port_id]; + + internals = bond_dev->data->dev_private; + if (find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) == + internals->active_slave_count) + return -EINVAL; + + port = &bond_mode_8023ad_ports[slave_id]; + info->selected = port->selected; + + info->actor_state = port->actor_state; + rte_memcpy(&info->actor, &port->actor, sizeof(port->actor)); + + info->partner_state = port->partner_state; + rte_memcpy(&info->partner, &port->partner, sizeof(port->partner)); + + info->agg_port_id = port->aggregator_port_id; + return 0; +} + +static int +bond_8023ad_ext_validate(uint16_t port_id, uint16_t slave_id) +{ + struct rte_eth_dev *bond_dev; + struct bond_dev_private *internals; + struct mode8023ad_private *mode4; + + if (rte_eth_bond_mode_get(port_id) != BONDING_MODE_8023AD) + return -EINVAL; + + bond_dev = &rte_eth_devices[port_id]; + + if (!bond_dev->data->dev_started) + return -EINVAL; + + internals = bond_dev->data->dev_private; + if (find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) == + internals->active_slave_count) + return -EINVAL; + + mode4 = &internals->mode4; + if (mode4->slowrx_cb == NULL) + return -EINVAL; + + return 0; +} + +int +rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id, + int enabled) +{ + struct port *port; + int res; + + res = bond_8023ad_ext_validate(port_id, slave_id); + if (res != 0) + return res; + + port = &bond_mode_8023ad_ports[slave_id]; + + if (enabled) + ACTOR_STATE_SET(port, COLLECTING); + else + ACTOR_STATE_CLR(port, COLLECTING); + + return 0; +} + +int +rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id, + int enabled) +{ + struct port *port; + int res; + + res = bond_8023ad_ext_validate(port_id, slave_id); + if (res != 0) + return res; + + port = &bond_mode_8023ad_ports[slave_id]; + + if (enabled) + ACTOR_STATE_SET(port, DISTRIBUTING); + else + ACTOR_STATE_CLR(port, DISTRIBUTING); + + return 0; +} + +int +rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id) +{ + struct port *port; + int err; + + err = bond_8023ad_ext_validate(port_id, slave_id); + if (err != 0) + return err; + + port = &bond_mode_8023ad_ports[slave_id]; + return ACTOR_STATE(port, DISTRIBUTING); +} + +int +rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id) +{ + struct port *port; + int err; + + err = bond_8023ad_ext_validate(port_id, slave_id); + if (err != 0) + return err; + + port = &bond_mode_8023ad_ports[slave_id]; + return ACTOR_STATE(port, COLLECTING); +} + +int +rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id, + struct rte_mbuf *lacp_pkt) +{ + struct port *port; + int res; + + res = bond_8023ad_ext_validate(port_id, slave_id); + if (res != 0) + return res; + + port = &bond_mode_8023ad_ports[slave_id]; + + if (rte_pktmbuf_pkt_len(lacp_pkt) < sizeof(struct lacpdu_header)) + return -EINVAL; + + struct lacpdu_header *lacp; + + /* only enqueue LACPDUs */ + lacp = rte_pktmbuf_mtod(lacp_pkt, struct lacpdu_header *); + if (lacp->lacpdu.subtype != SLOW_SUBTYPE_LACP) + return -EINVAL; + + MODE4_DEBUG("sending LACP frame\n"); + + return rte_ring_enqueue(port->tx_ring, lacp_pkt); +} + +static void +bond_mode_8023ad_ext_periodic_cb(void *arg) +{ + struct rte_eth_dev *bond_dev = arg; + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct mode8023ad_private *mode4 = &internals->mode4; + struct port *port; + void *pkt = NULL; + uint16_t i, slave_id; + + for (i = 0; i < internals->active_slave_count; i++) { + slave_id = internals->active_slaves[i]; + port = &bond_mode_8023ad_ports[slave_id]; + + if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) { + struct rte_mbuf *lacp_pkt = pkt; + struct lacpdu_header *lacp; + + lacp = rte_pktmbuf_mtod(lacp_pkt, + struct lacpdu_header *); + RTE_VERIFY(lacp->lacpdu.subtype == SLOW_SUBTYPE_LACP); + + /* This is LACP frame so pass it to rx callback. + * Callback is responsible for freeing mbuf. + */ + mode4->slowrx_cb(slave_id, lacp_pkt); + } + } + + rte_eal_alarm_set(internals->mode4.update_timeout_us, + bond_mode_8023ad_ext_periodic_cb, arg); +} + +int +rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port) +{ + int retval = 0; + struct rte_eth_dev *dev; + struct bond_dev_private *internals; + + if (valid_bonded_port_id(port) != 0) + return -EINVAL; + + dev = &rte_eth_devices[port]; + internals = dev->data->dev_private; + + if (check_for_bonded_ethdev(dev) != 0) + return -1; + + if (bond_8023ad_slow_pkt_hw_filter_supported(port) != 0) + return -1; + + /* Device must be stopped to set up slow queue */ + if (dev->data->dev_started) + return -1; + + internals->mode4.dedicated_queues.enabled = 1; + + bond_ethdev_mode_set(dev, internals->mode); + return retval; +} + +int +rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port) +{ + int retval = 0; + struct rte_eth_dev *dev; + struct bond_dev_private *internals; + + if (valid_bonded_port_id(port) != 0) + return -EINVAL; + + dev = &rte_eth_devices[port]; + internals = dev->data->dev_private; + + if (check_for_bonded_ethdev(dev) != 0) + return -1; + + /* Device must be stopped to set up slow queue */ + if (dev->data->dev_started) + return -1; + + internals->mode4.dedicated_queues.enabled = 0; + + bond_ethdev_mode_set(dev, internals->mode); + + return retval; +} diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.h b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.h new file mode 100644 index 000000000..11a71a55e --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_8023ad.h @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#ifndef RTE_ETH_BOND_8023AD_H_ +#define RTE_ETH_BOND_8023AD_H_ + +#include <rte_ether.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Actor/partner states + */ +#define STATE_LACP_ACTIVE 0x01 +#define STATE_LACP_SHORT_TIMEOUT 0x02 +#define STATE_AGGREGATION 0x04 +#define STATE_SYNCHRONIZATION 0x08 +#define STATE_COLLECTING 0x10 +#define STATE_DISTRIBUTING 0x20 +/** Partners parameters are defaulted */ +#define STATE_DEFAULTED 0x40 +#define STATE_EXPIRED 0x80 + +#define TLV_TYPE_ACTOR_INFORMATION 0x01 +#define TLV_TYPE_PARTNER_INFORMATION 0x02 +#define TLV_TYPE_COLLECTOR_INFORMATION 0x03 +#define TLV_TYPE_TERMINATOR_INFORMATION 0x00 + +#define SLOW_SUBTYPE_LACP 0x01 +#define SLOW_SUBTYPE_MARKER 0x02 + +#define MARKER_TLV_TYPE_INFO 0x01 +#define MARKER_TLV_TYPE_RESP 0x02 + +typedef void (*rte_eth_bond_8023ad_ext_slowrx_fn)(uint16_t slave_id, + struct rte_mbuf *lacp_pkt); + +enum rte_bond_8023ad_selection { + UNSELECTED, + STANDBY, + SELECTED +}; + +enum rte_bond_8023ad_agg_selection { + AGG_BANDWIDTH, + AGG_COUNT, + AGG_STABLE +}; + +/** Generic slow protocol structure */ +struct slow_protocol { + uint8_t subtype; + uint8_t reserved_119[119]; +} __rte_packed; + +/** Generic slow protocol frame type structure */ +struct slow_protocol_frame { + struct rte_ether_hdr eth_hdr; + struct slow_protocol slow_protocol; +} __rte_packed __rte_aligned(2); + +struct port_params { + uint16_t system_priority; + /**< System priority (unused in current implementation) */ + struct rte_ether_addr system; + /**< System ID - Slave MAC address, same as bonding MAC address */ + uint16_t key; + /**< Speed information (implementation dependednt) and duplex. */ + uint16_t port_priority; + /**< Priority of this (unused in current implementation) */ + uint16_t port_number; + /**< Port number. It corresponds to slave port id. */ +} __rte_packed __rte_aligned(2); + +struct lacpdu_actor_partner_params { + uint8_t tlv_type_info; + uint8_t info_length; + struct port_params port_params; + uint8_t state; + uint8_t reserved_3[3]; +} __rte_packed __rte_aligned(2); + +/** LACPDU structure (5.4.2 in 802.1AX documentation). */ +struct lacpdu { + uint8_t subtype; + uint8_t version_number; + + struct lacpdu_actor_partner_params actor; + struct lacpdu_actor_partner_params partner; + + uint8_t tlv_type_collector_info; + uint8_t collector_info_length; + uint16_t collector_max_delay; + uint8_t reserved_12[12]; + + uint8_t tlv_type_terminator; + uint8_t terminator_length; + uint8_t reserved_50[50]; +} __rte_packed __rte_aligned(2); + +/** LACPDU frame: Contains ethernet header and LACPDU. */ +struct lacpdu_header { + struct rte_ether_hdr eth_hdr; + struct lacpdu lacpdu; +} __rte_packed __rte_aligned(2); + +struct marker { + uint8_t subtype; + uint8_t version_number; + + uint8_t tlv_type_marker; + uint8_t info_length; + uint16_t requester_port; + struct rte_ether_addr requester_system; + uint32_t requester_transaction_id; + uint8_t reserved_2[2]; + + uint8_t tlv_type_terminator; + uint8_t terminator_length; + uint8_t reserved_90[90]; +} __rte_packed __rte_aligned(2); + +struct marker_header { + struct rte_ether_hdr eth_hdr; + struct marker marker; +} __rte_packed __rte_aligned(2); + +struct rte_eth_bond_8023ad_conf { + uint32_t fast_periodic_ms; + uint32_t slow_periodic_ms; + uint32_t short_timeout_ms; + uint32_t long_timeout_ms; + uint32_t aggregate_wait_timeout_ms; + uint32_t tx_period_ms; + uint32_t rx_marker_period_ms; + uint32_t update_timeout_ms; + rte_eth_bond_8023ad_ext_slowrx_fn slowrx_cb; + enum rte_bond_8023ad_agg_selection agg_selection; +}; + +struct rte_eth_bond_8023ad_slave_info { + enum rte_bond_8023ad_selection selected; + uint8_t actor_state; + struct port_params actor; + uint8_t partner_state; + struct port_params partner; + uint16_t agg_port_id; +}; + +/** + * @internal + * + * Function returns current configuration of 802.3AX mode. + * + * @param port_id Bonding device id + * @param conf Pointer to timeout structure. + * + * @return + * 0 - if ok + * -EINVAL if conf is NULL + */ +int +rte_eth_bond_8023ad_conf_get(uint16_t port_id, + struct rte_eth_bond_8023ad_conf *conf); + +/** + * @internal + * + * Function set new configuration of 802.3AX mode. + * + * @param port_id Bonding device id + * @param conf Configuration, if NULL set default configuration. + * @return + * 0 - if ok + * -EINVAL if configuration is invalid. + */ +int +rte_eth_bond_8023ad_setup(uint16_t port_id, + struct rte_eth_bond_8023ad_conf *conf); + +/** + * @internal + * + * Function returns current state of given slave device. + * + * @param slave_id Port id of valid slave. + * @param conf buffer for configuration + * @return + * 0 - if ok + * -EINVAL if conf is NULL or slave id is invalid (not a slave of given + * bonded device or is not inactive). + */ +int +rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id, + struct rte_eth_bond_8023ad_slave_info *conf); + +#ifdef __cplusplus +} +#endif + +/** + * Configure a slave port to start collecting. + * + * @param port_id Bonding device id + * @param slave_id Port id of valid slave. + * @param enabled Non-zero when collection enabled. + * @return + * 0 - if ok + * -EINVAL if slave is not valid. + */ +int +rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id, + int enabled); + +/** + * Get COLLECTING flag from slave port actor state. + * + * @param port_id Bonding device id + * @param slave_id Port id of valid slave. + * @return + * 0 - if not set + * 1 - if set + * -EINVAL if slave is not valid. + */ +int +rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id); + +/** + * Configure a slave port to start distributing. + * + * @param port_id Bonding device id + * @param slave_id Port id of valid slave. + * @param enabled Non-zero when distribution enabled. + * @return + * 0 - if ok + * -EINVAL if slave is not valid. + */ +int +rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id, + int enabled); + +/** + * Get DISTRIBUTING flag from slave port actor state. + * + * @param port_id Bonding device id + * @param slave_id Port id of valid slave. + * @return + * 0 - if not set + * 1 - if set + * -EINVAL if slave is not valid. + */ +int +rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id); + +/** + * LACPDU transmit path for external 802.3ad state machine. Caller retains + * ownership of the packet on failure. + * + * @param port_id Bonding device id + * @param slave_id Port ID of valid slave device. + * @param lacp_pkt mbuf containing LACPDU. + * + * @return + * 0 on success, negative value otherwise. + */ +int +rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id, + struct rte_mbuf *lacp_pkt); + +/** + * Enable dedicated hw queues for 802.3ad control plane traffic on on slaves + * + * This function creates an additional tx and rx queue on each slave for + * dedicated 802.3ad control plane traffic . A flow filtering rule is + * programmed on each slave to redirect all LACP slow packets to that rx queue + * for processing in the LACP state machine, this removes the need to filter + * these packets in the bonded devices data path. The additional tx queue is + * used to enable the LACP state machine to enqueue LACP packets directly to + * slave hw independently of the bonded devices data path. + * + * To use this feature all slaves must support the programming of the flow + * filter rule required for rx and have enough queues that one rx and tx queue + * can be reserved for the LACP state machines control packets. + * + * Bonding port must be stopped to change this configuration. + * + * @param port_id Bonding device id + * + * @return + * 0 on success, negative value otherwise. + */ +int +rte_eth_bond_8023ad_dedicated_queues_enable(uint16_t port_id); + +/** + * Disable slow queue on slaves + * + * This function disables hardware slow packet filter. + * + * Bonding port must be stopped to change this configuration. + * + * @see rte_eth_bond_8023ad_slow_pkt_hw_filter_enable + * + * @param port_id Bonding device id + * @return + * 0 on success, negative value otherwise. + * + */ +int +rte_eth_bond_8023ad_dedicated_queues_disable(uint16_t port_id); + +/* + * Get aggregator mode for 8023ad + * @param port_id Bonding device id + * + * @return + * agregator mode on success, negative value otherwise + */ +int +rte_eth_bond_8023ad_agg_selection_get(uint16_t port_id); + +/** + * Set aggregator mode for 8023ad + * @param port_id Bonding device id + * @return + * 0 on success, negative value otherwise + */ +int +rte_eth_bond_8023ad_agg_selection_set(uint16_t port_id, + enum rte_bond_8023ad_agg_selection agg_selection); +#endif /* RTE_ETH_BOND_8023AD_H_ */ diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_alb.c b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_alb.c new file mode 100644 index 000000000..1d36a4a4a --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_alb.c @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#include "eth_bond_private.h" +#include "rte_eth_bond_alb.h" + +static inline uint8_t +simple_hash(uint8_t *hash_start, int hash_size) +{ + int i; + uint8_t hash; + + hash = 0; + for (i = 0; i < hash_size; ++i) + hash ^= hash_start[i]; + + return hash; +} + +static uint16_t +calculate_slave(struct bond_dev_private *internals) +{ + uint16_t idx; + + idx = (internals->mode6.last_slave + 1) % internals->active_slave_count; + internals->mode6.last_slave = idx; + return internals->active_slaves[idx]; +} + +int +bond_mode_alb_enable(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct client_data *hash_table = internals->mode6.client_table; + + uint16_t data_size; + char mem_name[RTE_ETH_NAME_MAX_LEN]; + int socket_id = bond_dev->data->numa_node; + + /* Fill hash table with initial values */ + memset(hash_table, 0, sizeof(struct client_data) * ALB_HASH_TABLE_SIZE); + rte_spinlock_init(&internals->mode6.lock); + internals->mode6.last_slave = ALB_NULL_INDEX; + internals->mode6.ntt = 0; + + /* Initialize memory pool for ARP packets to send */ + if (internals->mode6.mempool == NULL) { + /* + * 256 is size of ETH header, ARP header and nested VLAN headers. + * The value is chosen to be cache aligned. + */ + data_size = 256 + RTE_PKTMBUF_HEADROOM; + snprintf(mem_name, sizeof(mem_name), "%s_ALB", + bond_dev->device->name); + internals->mode6.mempool = rte_pktmbuf_pool_create(mem_name, + 512 * RTE_MAX_ETHPORTS, + RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ? + 32 : RTE_MEMPOOL_CACHE_MAX_SIZE, + 0, data_size, socket_id); + + if (internals->mode6.mempool == NULL) { + RTE_BOND_LOG(ERR, "%s: Failed to initialize ALB mempool.\n", + bond_dev->device->name); + goto mempool_alloc_error; + } + } + + return 0; + +mempool_alloc_error: + return -ENOMEM; +} + +void bond_mode_alb_arp_recv(struct rte_ether_hdr *eth_h, uint16_t offset, + struct bond_dev_private *internals) +{ + struct rte_arp_hdr *arp; + + struct client_data *hash_table = internals->mode6.client_table; + struct client_data *client_info; + + uint8_t hash_index; + + arp = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset); + + /* ARP Requests are forwarded to the application with no changes */ + if (arp->arp_opcode != rte_cpu_to_be_16(RTE_ARP_OP_REPLY)) + return; + + /* From now on, we analyze only ARP Reply packets */ + hash_index = simple_hash((uint8_t *) &arp->arp_data.arp_sip, + sizeof(arp->arp_data.arp_sip)); + client_info = &hash_table[hash_index]; + + /* + * We got reply for ARP Request send by the application. We need to + * update client table when received data differ from what is stored + * in ALB table and issue sending update packet to that slave. + */ + rte_spinlock_lock(&internals->mode6.lock); + if (client_info->in_use == 0 || + client_info->app_ip != arp->arp_data.arp_tip || + client_info->cli_ip != arp->arp_data.arp_sip || + !rte_is_same_ether_addr(&client_info->cli_mac, + &arp->arp_data.arp_sha) || + client_info->vlan_count != offset / sizeof(struct rte_vlan_hdr) || + memcmp(client_info->vlan, eth_h + 1, offset) != 0 + ) { + client_info->in_use = 1; + client_info->app_ip = arp->arp_data.arp_tip; + client_info->cli_ip = arp->arp_data.arp_sip; + rte_ether_addr_copy(&arp->arp_data.arp_sha, + &client_info->cli_mac); + client_info->slave_idx = calculate_slave(internals); + rte_eth_macaddr_get(client_info->slave_idx, + &client_info->app_mac); + rte_ether_addr_copy(&client_info->app_mac, + &arp->arp_data.arp_tha); + memcpy(client_info->vlan, eth_h + 1, offset); + client_info->vlan_count = offset / sizeof(struct rte_vlan_hdr); + } + internals->mode6.ntt = 1; + rte_spinlock_unlock(&internals->mode6.lock); +} + +uint16_t +bond_mode_alb_arp_xmit(struct rte_ether_hdr *eth_h, uint16_t offset, + struct bond_dev_private *internals) +{ + struct rte_arp_hdr *arp; + + struct client_data *hash_table = internals->mode6.client_table; + struct client_data *client_info; + + uint8_t hash_index; + + struct rte_ether_addr bonding_mac; + + arp = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset); + + /* + * Traffic with src MAC other than bonding should be sent on + * current primary port. + */ + rte_eth_macaddr_get(internals->port_id, &bonding_mac); + if (!rte_is_same_ether_addr(&bonding_mac, &arp->arp_data.arp_sha)) { + rte_eth_macaddr_get(internals->current_primary_port, + &arp->arp_data.arp_sha); + return internals->current_primary_port; + } + + hash_index = simple_hash((uint8_t *)&arp->arp_data.arp_tip, + sizeof(uint32_t)); + client_info = &hash_table[hash_index]; + + rte_spinlock_lock(&internals->mode6.lock); + if (arp->arp_opcode == rte_cpu_to_be_16(RTE_ARP_OP_REPLY)) { + if (client_info->in_use) { + if (client_info->app_ip == arp->arp_data.arp_sip && + client_info->cli_ip == arp->arp_data.arp_tip) { + /* Entry is already assigned to this client */ + if (!rte_is_broadcast_ether_addr( + &arp->arp_data.arp_tha)) { + rte_ether_addr_copy( + &arp->arp_data.arp_tha, + &client_info->cli_mac); + } + rte_eth_macaddr_get(client_info->slave_idx, + &client_info->app_mac); + rte_ether_addr_copy(&client_info->app_mac, + &arp->arp_data.arp_sha); + memcpy(client_info->vlan, eth_h + 1, offset); + client_info->vlan_count = offset / sizeof(struct rte_vlan_hdr); + rte_spinlock_unlock(&internals->mode6.lock); + return client_info->slave_idx; + } + } + + /* Assign new slave to this client and update src mac in ARP */ + client_info->in_use = 1; + client_info->ntt = 0; + client_info->app_ip = arp->arp_data.arp_sip; + rte_ether_addr_copy(&arp->arp_data.arp_tha, + &client_info->cli_mac); + client_info->cli_ip = arp->arp_data.arp_tip; + client_info->slave_idx = calculate_slave(internals); + rte_eth_macaddr_get(client_info->slave_idx, + &client_info->app_mac); + rte_ether_addr_copy(&client_info->app_mac, + &arp->arp_data.arp_sha); + memcpy(client_info->vlan, eth_h + 1, offset); + client_info->vlan_count = offset / sizeof(struct rte_vlan_hdr); + rte_spinlock_unlock(&internals->mode6.lock); + return client_info->slave_idx; + } + + /* If packet is not ARP Reply, send it on current primary port. */ + rte_spinlock_unlock(&internals->mode6.lock); + rte_eth_macaddr_get(internals->current_primary_port, + &arp->arp_data.arp_sha); + return internals->current_primary_port; +} + +uint16_t +bond_mode_alb_arp_upd(struct client_data *client_info, + struct rte_mbuf *pkt, struct bond_dev_private *internals) +{ + struct rte_ether_hdr *eth_h; + struct rte_arp_hdr *arp_h; + uint16_t slave_idx; + + rte_spinlock_lock(&internals->mode6.lock); + eth_h = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *); + + rte_ether_addr_copy(&client_info->app_mac, ð_h->s_addr); + rte_ether_addr_copy(&client_info->cli_mac, ð_h->d_addr); + if (client_info->vlan_count > 0) + eth_h->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN); + else + eth_h->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP); + + arp_h = (struct rte_arp_hdr *)( + (char *)eth_h + sizeof(struct rte_ether_hdr) + + client_info->vlan_count * sizeof(struct rte_vlan_hdr)); + + memcpy(eth_h + 1, client_info->vlan, + client_info->vlan_count * sizeof(struct rte_vlan_hdr)); + + rte_ether_addr_copy(&client_info->app_mac, &arp_h->arp_data.arp_sha); + arp_h->arp_data.arp_sip = client_info->app_ip; + rte_ether_addr_copy(&client_info->cli_mac, &arp_h->arp_data.arp_tha); + arp_h->arp_data.arp_tip = client_info->cli_ip; + + arp_h->arp_hardware = rte_cpu_to_be_16(RTE_ARP_HRD_ETHER); + arp_h->arp_protocol = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4); + arp_h->arp_hlen = RTE_ETHER_ADDR_LEN; + arp_h->arp_plen = sizeof(uint32_t); + arp_h->arp_opcode = rte_cpu_to_be_16(RTE_ARP_OP_REPLY); + + slave_idx = client_info->slave_idx; + rte_spinlock_unlock(&internals->mode6.lock); + + return slave_idx; +} + +void +bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev) +{ + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct client_data *client_info; + + int i; + + /* If active slave count is 0, it's pointless to refresh alb table */ + if (internals->active_slave_count <= 0) + return; + + rte_spinlock_lock(&internals->mode6.lock); + internals->mode6.last_slave = ALB_NULL_INDEX; + + for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { + client_info = &internals->mode6.client_table[i]; + if (client_info->in_use) { + client_info->slave_idx = calculate_slave(internals); + rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac); + internals->mode6.ntt = 1; + } + } + rte_spinlock_unlock(&internals->mode6.lock); +} diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_alb.h b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_alb.h new file mode 100644 index 000000000..386e70c59 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_alb.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#ifndef RTE_ETH_BOND_ALB_H_ +#define RTE_ETH_BOND_ALB_H_ + +#include <rte_ether.h> +#include <rte_arp.h> + +#define ALB_HASH_TABLE_SIZE 256 +#define ALB_NULL_INDEX 0xFFFFFFFF + +struct client_data { + /** ARP data of single client */ + struct rte_ether_addr app_mac; + /**< MAC address of application running DPDK */ + uint32_t app_ip; + /**< IP address of application running DPDK */ + struct rte_ether_addr cli_mac; + /**< Client MAC address */ + uint32_t cli_ip; + /**< Client IP address */ + + uint16_t slave_idx; + /**< Index of slave on which we connect with that client */ + uint8_t in_use; + /**< Flag indicating if entry in client table is currently used */ + uint8_t ntt; + /**< Flag indicating if we need to send update to this client on next tx */ + + struct rte_vlan_hdr vlan[2]; + /**< Content of vlan headers */ + uint8_t vlan_count; + /**< Number of nested vlan headers */ +}; + +struct mode_alb_private { + struct client_data client_table[ALB_HASH_TABLE_SIZE]; + /**< Hash table storing ARP data of every client connected */ + struct rte_mempool *mempool; + /**< Mempool for creating ARP update packets */ + uint8_t ntt; + /**< Flag indicating if we need to send update to any client on next tx */ + uint32_t last_slave; + /**< Index of last used slave in client table */ + rte_spinlock_t lock; +}; + +/** + * ALB mode initialization. + * + * @param bond_dev Pointer to bonding device. + * + * @return + * Error code - 0 on success. + */ +int +bond_mode_alb_enable(struct rte_eth_dev *bond_dev); + +/** + * Function handles ARP packet reception. If received ARP request, it is + * forwarded to application without changes. If it is ARP reply, client table + * is updated. + * + * @param eth_h ETH header of received packet. + * @param offset Vlan header offset. + * @param internals Bonding data. + */ +void +bond_mode_alb_arp_recv(struct rte_ether_hdr *eth_h, uint16_t offset, + struct bond_dev_private *internals); + +/** + * Function handles ARP packet transmission. It also decides on which slave + * send that packet. If packet is ARP Request, it is send on primary slave. + * If it is ARP Reply, it is send on slave stored in client table for that + * connection. On Reply function also updates data in client table. + * + * @param eth_h ETH header of transmitted packet. + * @param offset Vlan header offset. + * @param internals Bonding data. + * + * @return + * Index of slave on which packet should be sent. + */ +uint16_t +bond_mode_alb_arp_xmit(struct rte_ether_hdr *eth_h, uint16_t offset, + struct bond_dev_private *internals); + +/** + * Function fills packet with ARP data from client_info. + * + * @param client_info Data of client to which packet is sent. + * @param pkt Pointer to packet which is sent. + * @param internals Bonding data. + * + * @return + * Index of slawe on which packet should be sent. + */ +uint16_t +bond_mode_alb_arp_upd(struct client_data *client_info, + struct rte_mbuf *pkt, struct bond_dev_private *internals); + +/** + * Function updates slave indexes of active connections. + * + * @param bond_dev Pointer to bonded device struct. + */ +void +bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev); + +#endif /* RTE_ETH_BOND_ALB_H_ */ diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_api.c b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_api.c new file mode 100644 index 000000000..f38eb3b47 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_api.c @@ -0,0 +1,1052 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#include <string.h> + +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_ethdev_driver.h> +#include <rte_tcp.h> +#include <rte_bus_vdev.h> +#include <rte_kvargs.h> + +#include "rte_eth_bond.h" +#include "eth_bond_private.h" +#include "eth_bond_8023ad_private.h" + +int +check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev) +{ + /* Check valid pointer */ + if (eth_dev == NULL || + eth_dev->device == NULL || + eth_dev->device->driver == NULL || + eth_dev->device->driver->name == NULL) + return -1; + + /* return 0 if driver name matches */ + return eth_dev->device->driver->name != pmd_bond_drv.driver.name; +} + +int +valid_bonded_port_id(uint16_t port_id) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1); + return check_for_bonded_ethdev(&rte_eth_devices[port_id]); +} + +int +check_for_master_bonded_ethdev(const struct rte_eth_dev *eth_dev) +{ + int i; + struct bond_dev_private *internals; + + if (check_for_bonded_ethdev(eth_dev) != 0) + return 0; + + internals = eth_dev->data->dev_private; + + /* Check if any of slave devices is a bonded device */ + for (i = 0; i < internals->slave_count; i++) + if (valid_bonded_port_id(internals->slaves[i].port_id) == 0) + return 1; + + return 0; +} + +int +valid_slave_port_id(uint16_t port_id, uint8_t mode) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1); + + /* Verify that port_id refers to a non bonded port */ + if (check_for_bonded_ethdev(&rte_eth_devices[port_id]) == 0 && + mode == BONDING_MODE_8023AD) { + RTE_BOND_LOG(ERR, "Cannot add slave to bonded device in 802.3ad" + " mode as slave is also a bonded device, only " + "physical devices can be support in this mode."); + return -1; + } + + return 0; +} + +void +activate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint16_t active_count = internals->active_slave_count; + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_activate_slave(eth_dev, port_id); + + if (internals->mode == BONDING_MODE_TLB + || internals->mode == BONDING_MODE_ALB) { + + internals->tlb_slaves_order[active_count] = port_id; + } + + RTE_ASSERT(internals->active_slave_count < + (RTE_DIM(internals->active_slaves) - 1)); + + internals->active_slaves[internals->active_slave_count] = port_id; + internals->active_slave_count++; + + if (internals->mode == BONDING_MODE_TLB) + bond_tlb_activate_slave(internals); + if (internals->mode == BONDING_MODE_ALB) + bond_mode_alb_client_list_upd(eth_dev); +} + +void +deactivate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id) +{ + uint16_t slave_pos; + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint16_t active_count = internals->active_slave_count; + + if (internals->mode == BONDING_MODE_8023AD) { + bond_mode_8023ad_stop(eth_dev); + bond_mode_8023ad_deactivate_slave(eth_dev, port_id); + } else if (internals->mode == BONDING_MODE_TLB + || internals->mode == BONDING_MODE_ALB) + bond_tlb_disable(internals); + + slave_pos = find_slave_by_id(internals->active_slaves, active_count, + port_id); + + /* If slave was not at the end of the list + * shift active slaves up active array list */ + if (slave_pos < active_count) { + active_count--; + memmove(internals->active_slaves + slave_pos, + internals->active_slaves + slave_pos + 1, + (active_count - slave_pos) * + sizeof(internals->active_slaves[0])); + } + + RTE_ASSERT(active_count < RTE_DIM(internals->active_slaves)); + internals->active_slave_count = active_count; + + /* Resetting active_slave when reaches to max + * no of slaves in active list + */ + if (internals->active_slave >= active_count) + internals->active_slave = 0; + + if (eth_dev->data->dev_started) { + if (internals->mode == BONDING_MODE_8023AD) { + bond_mode_8023ad_start(eth_dev); + } else if (internals->mode == BONDING_MODE_TLB) { + bond_tlb_enable(internals); + } else if (internals->mode == BONDING_MODE_ALB) { + bond_tlb_enable(internals); + bond_mode_alb_client_list_upd(eth_dev); + } + } +} + +int +rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) +{ + struct bond_dev_private *internals; + char devargs[52]; + uint16_t port_id; + int ret; + + if (name == NULL) { + RTE_BOND_LOG(ERR, "Invalid name specified"); + return -EINVAL; + } + + ret = snprintf(devargs, sizeof(devargs), + "driver=net_bonding,mode=%d,socket_id=%d", mode, socket_id); + if (ret < 0 || ret >= (int)sizeof(devargs)) + return -ENOMEM; + + ret = rte_vdev_init(name, devargs); + if (ret) + return -ENOMEM; + + ret = rte_eth_dev_get_port_by_name(name, &port_id); + RTE_ASSERT(!ret); + + /* + * To make bond_ethdev_configure() happy we need to free the + * internals->kvlist here. + * + * Also see comment in bond_ethdev_configure(). + */ + internals = rte_eth_devices[port_id].data->dev_private; + rte_kvargs_free(internals->kvlist); + internals->kvlist = NULL; + + return port_id; +} + +int +rte_eth_bond_free(const char *name) +{ + return rte_vdev_uninit(name); +} + +static int +slave_vlan_filter_set(uint16_t bonded_port_id, uint16_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + int found; + int res = 0; + uint64_t slab = 0; + uint32_t pos = 0; + uint16_t first; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + if ((bonded_eth_dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER) == 0) + return 0; + + internals = bonded_eth_dev->data->dev_private; + found = rte_bitmap_scan(internals->vlan_filter_bmp, &pos, &slab); + first = pos; + + if (!found) + return 0; + + do { + uint32_t i; + uint64_t mask; + + for (i = 0, mask = 1; + i < RTE_BITMAP_SLAB_BIT_SIZE; + i ++, mask <<= 1) { + if (unlikely(slab & mask)) { + uint16_t vlan_id = pos + i; + + res = rte_eth_dev_vlan_filter(slave_port_id, + vlan_id, 1); + } + } + found = rte_bitmap_scan(internals->vlan_filter_bmp, + &pos, &slab); + } while (found && first != pos && res == 0); + + return res; +} + +static int +slave_rte_flow_prepare(uint16_t slave_id, struct bond_dev_private *internals) +{ + struct rte_flow *flow; + struct rte_flow_error ferror; + uint16_t slave_port_id = internals->slaves[slave_id].port_id; + + if (internals->flow_isolated_valid != 0) { + rte_eth_dev_stop(slave_port_id); + if (rte_flow_isolate(slave_port_id, internals->flow_isolated, + &ferror)) { + RTE_BOND_LOG(ERR, "rte_flow_isolate failed for slave" + " %d: %s", slave_id, ferror.message ? + ferror.message : "(no stated reason)"); + return -1; + } + } + TAILQ_FOREACH(flow, &internals->flow_list, next) { + flow->flows[slave_id] = rte_flow_create(slave_port_id, + flow->rule.attr, + flow->rule.pattern, + flow->rule.actions, + &ferror); + if (flow->flows[slave_id] == NULL) { + RTE_BOND_LOG(ERR, "Cannot create flow for slave" + " %d: %s", slave_id, + ferror.message ? ferror.message : + "(no stated reason)"); + /* Destroy successful bond flows from the slave */ + TAILQ_FOREACH(flow, &internals->flow_list, next) { + if (flow->flows[slave_id] != NULL) { + rte_flow_destroy(slave_port_id, + flow->flows[slave_id], + &ferror); + flow->flows[slave_id] = NULL; + } + } + return -1; + } + } + return 0; +} + +static void +eth_bond_slave_inherit_dev_info_rx_first(struct bond_dev_private *internals, + const struct rte_eth_dev_info *di) +{ + struct rte_eth_rxconf *rxconf_i = &internals->default_rxconf; + + internals->reta_size = di->reta_size; + + /* Inherit Rx offload capabilities from the first slave device */ + internals->rx_offload_capa = di->rx_offload_capa; + internals->rx_queue_offload_capa = di->rx_queue_offload_capa; + internals->flow_type_rss_offloads = di->flow_type_rss_offloads; + + /* Inherit maximum Rx packet size from the first slave device */ + internals->candidate_max_rx_pktlen = di->max_rx_pktlen; + + /* Inherit default Rx queue settings from the first slave device */ + memcpy(rxconf_i, &di->default_rxconf, sizeof(*rxconf_i)); + + /* + * Turn off descriptor prefetch and writeback by default for all + * slave devices. Applications may tweak this setting if need be. + */ + rxconf_i->rx_thresh.pthresh = 0; + rxconf_i->rx_thresh.hthresh = 0; + rxconf_i->rx_thresh.wthresh = 0; + + /* Setting this to zero should effectively enable default values */ + rxconf_i->rx_free_thresh = 0; + + /* Disable deferred start by default for all slave devices */ + rxconf_i->rx_deferred_start = 0; +} + +static void +eth_bond_slave_inherit_dev_info_tx_first(struct bond_dev_private *internals, + const struct rte_eth_dev_info *di) +{ + struct rte_eth_txconf *txconf_i = &internals->default_txconf; + + /* Inherit Tx offload capabilities from the first slave device */ + internals->tx_offload_capa = di->tx_offload_capa; + internals->tx_queue_offload_capa = di->tx_queue_offload_capa; + + /* Inherit default Tx queue settings from the first slave device */ + memcpy(txconf_i, &di->default_txconf, sizeof(*txconf_i)); + + /* + * Turn off descriptor prefetch and writeback by default for all + * slave devices. Applications may tweak this setting if need be. + */ + txconf_i->tx_thresh.pthresh = 0; + txconf_i->tx_thresh.hthresh = 0; + txconf_i->tx_thresh.wthresh = 0; + + /* + * Setting these parameters to zero assumes that default + * values will be configured implicitly by slave devices. + */ + txconf_i->tx_free_thresh = 0; + txconf_i->tx_rs_thresh = 0; + + /* Disable deferred start by default for all slave devices */ + txconf_i->tx_deferred_start = 0; +} + +static void +eth_bond_slave_inherit_dev_info_rx_next(struct bond_dev_private *internals, + const struct rte_eth_dev_info *di) +{ + struct rte_eth_rxconf *rxconf_i = &internals->default_rxconf; + const struct rte_eth_rxconf *rxconf = &di->default_rxconf; + + internals->rx_offload_capa &= di->rx_offload_capa; + internals->rx_queue_offload_capa &= di->rx_queue_offload_capa; + internals->flow_type_rss_offloads &= di->flow_type_rss_offloads; + + /* + * If at least one slave device suggests enabling this + * setting by default, enable it for all slave devices + * since disabling it may not be necessarily supported. + */ + if (rxconf->rx_drop_en == 1) + rxconf_i->rx_drop_en = 1; + + /* + * Adding a new slave device may cause some of previously inherited + * offloads to be withdrawn from the internal rx_queue_offload_capa + * value. Thus, the new internal value of default Rx queue offloads + * has to be masked by rx_queue_offload_capa to make sure that only + * commonly supported offloads are preserved from both the previous + * value and the value being inhereted from the new slave device. + */ + rxconf_i->offloads = (rxconf_i->offloads | rxconf->offloads) & + internals->rx_queue_offload_capa; + + /* + * RETA size is GCD of all slaves RETA sizes, so, if all sizes will be + * the power of 2, the lower one is GCD + */ + if (internals->reta_size > di->reta_size) + internals->reta_size = di->reta_size; + + if (!internals->max_rx_pktlen && + di->max_rx_pktlen < internals->candidate_max_rx_pktlen) + internals->candidate_max_rx_pktlen = di->max_rx_pktlen; +} + +static void +eth_bond_slave_inherit_dev_info_tx_next(struct bond_dev_private *internals, + const struct rte_eth_dev_info *di) +{ + struct rte_eth_txconf *txconf_i = &internals->default_txconf; + const struct rte_eth_txconf *txconf = &di->default_txconf; + + internals->tx_offload_capa &= di->tx_offload_capa; + internals->tx_queue_offload_capa &= di->tx_queue_offload_capa; + + /* + * Adding a new slave device may cause some of previously inherited + * offloads to be withdrawn from the internal tx_queue_offload_capa + * value. Thus, the new internal value of default Tx queue offloads + * has to be masked by tx_queue_offload_capa to make sure that only + * commonly supported offloads are preserved from both the previous + * value and the value being inhereted from the new slave device. + */ + txconf_i->offloads = (txconf_i->offloads | txconf->offloads) & + internals->tx_queue_offload_capa; +} + +static void +eth_bond_slave_inherit_desc_lim_first(struct rte_eth_desc_lim *bond_desc_lim, + const struct rte_eth_desc_lim *slave_desc_lim) +{ + memcpy(bond_desc_lim, slave_desc_lim, sizeof(*bond_desc_lim)); +} + +static int +eth_bond_slave_inherit_desc_lim_next(struct rte_eth_desc_lim *bond_desc_lim, + const struct rte_eth_desc_lim *slave_desc_lim) +{ + bond_desc_lim->nb_max = RTE_MIN(bond_desc_lim->nb_max, + slave_desc_lim->nb_max); + bond_desc_lim->nb_min = RTE_MAX(bond_desc_lim->nb_min, + slave_desc_lim->nb_min); + bond_desc_lim->nb_align = RTE_MAX(bond_desc_lim->nb_align, + slave_desc_lim->nb_align); + + if (bond_desc_lim->nb_min > bond_desc_lim->nb_max || + bond_desc_lim->nb_align > bond_desc_lim->nb_max) { + RTE_BOND_LOG(ERR, "Failed to inherit descriptor limits"); + return -EINVAL; + } + + /* Treat maximum number of segments equal to 0 as unspecified */ + if (slave_desc_lim->nb_seg_max != 0 && + (bond_desc_lim->nb_seg_max == 0 || + slave_desc_lim->nb_seg_max < bond_desc_lim->nb_seg_max)) + bond_desc_lim->nb_seg_max = slave_desc_lim->nb_seg_max; + if (slave_desc_lim->nb_mtu_seg_max != 0 && + (bond_desc_lim->nb_mtu_seg_max == 0 || + slave_desc_lim->nb_mtu_seg_max < bond_desc_lim->nb_mtu_seg_max)) + bond_desc_lim->nb_mtu_seg_max = slave_desc_lim->nb_mtu_seg_max; + + return 0; +} + +static int +__eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev; + struct bond_dev_private *internals; + struct rte_eth_link link_props; + struct rte_eth_dev_info dev_info; + int ret; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + if (valid_slave_port_id(slave_port_id, internals->mode) != 0) + return -1; + + slave_eth_dev = &rte_eth_devices[slave_port_id]; + if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_BONDED_SLAVE) { + RTE_BOND_LOG(ERR, "Slave device is already a slave of a bonded device"); + return -1; + } + + ret = rte_eth_dev_info_get(slave_port_id, &dev_info); + if (ret != 0) { + RTE_BOND_LOG(ERR, + "%s: Error during getting device (port %u) info: %s\n", + __func__, slave_port_id, strerror(-ret)); + + return ret; + } + if (dev_info.max_rx_pktlen < internals->max_rx_pktlen) { + RTE_BOND_LOG(ERR, "Slave (port %u) max_rx_pktlen too small", + slave_port_id); + return -1; + } + + slave_add(internals, slave_eth_dev); + + /* We need to store slaves reta_size to be able to synchronize RETA for all + * slave devices even if its sizes are different. + */ + internals->slaves[internals->slave_count].reta_size = dev_info.reta_size; + + if (internals->slave_count < 1) { + /* if MAC is not user defined then use MAC of first slave add to + * bonded device */ + if (!internals->user_defined_mac) { + if (mac_address_set(bonded_eth_dev, + slave_eth_dev->data->mac_addrs)) { + RTE_BOND_LOG(ERR, "Failed to set MAC address"); + return -1; + } + } + + /* Make primary slave */ + internals->primary_port = slave_port_id; + internals->current_primary_port = slave_port_id; + + /* Inherit queues settings from first slave */ + internals->nb_rx_queues = slave_eth_dev->data->nb_rx_queues; + internals->nb_tx_queues = slave_eth_dev->data->nb_tx_queues; + + eth_bond_slave_inherit_dev_info_rx_first(internals, &dev_info); + eth_bond_slave_inherit_dev_info_tx_first(internals, &dev_info); + + eth_bond_slave_inherit_desc_lim_first(&internals->rx_desc_lim, + &dev_info.rx_desc_lim); + eth_bond_slave_inherit_desc_lim_first(&internals->tx_desc_lim, + &dev_info.tx_desc_lim); + } else { + int ret; + + eth_bond_slave_inherit_dev_info_rx_next(internals, &dev_info); + eth_bond_slave_inherit_dev_info_tx_next(internals, &dev_info); + + ret = eth_bond_slave_inherit_desc_lim_next( + &internals->rx_desc_lim, &dev_info.rx_desc_lim); + if (ret != 0) + return ret; + + ret = eth_bond_slave_inherit_desc_lim_next( + &internals->tx_desc_lim, &dev_info.tx_desc_lim); + if (ret != 0) + return ret; + } + + bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &= + internals->flow_type_rss_offloads; + + if (slave_rte_flow_prepare(internals->slave_count, internals) != 0) { + RTE_BOND_LOG(ERR, "Failed to prepare new slave flows: port=%d", + slave_port_id); + return -1; + } + + /* Add additional MAC addresses to the slave */ + if (slave_add_mac_addresses(bonded_eth_dev, slave_port_id) != 0) { + RTE_BOND_LOG(ERR, "Failed to add mac address(es) to slave %hu", + slave_port_id); + return -1; + } + + internals->slave_count++; + + if (bonded_eth_dev->data->dev_started) { + if (slave_configure(bonded_eth_dev, slave_eth_dev) != 0) { + internals->slave_count--; + RTE_BOND_LOG(ERR, "rte_bond_slaves_configure: port=%d", + slave_port_id); + return -1; + } + } + + /* Update all slave devices MACs */ + mac_address_slaves_update(bonded_eth_dev); + + /* Register link status change callback with bonded device pointer as + * argument*/ + rte_eth_dev_callback_register(slave_port_id, RTE_ETH_EVENT_INTR_LSC, + bond_ethdev_lsc_event_callback, &bonded_eth_dev->data->port_id); + + /* If bonded device is started then we can add the slave to our active + * slave array */ + if (bonded_eth_dev->data->dev_started) { + ret = rte_eth_link_get_nowait(slave_port_id, &link_props); + if (ret < 0) { + rte_eth_dev_callback_unregister(slave_port_id, + RTE_ETH_EVENT_INTR_LSC, + bond_ethdev_lsc_event_callback, + &bonded_eth_dev->data->port_id); + internals->slave_count--; + RTE_BOND_LOG(ERR, + "Slave (port %u) link get failed: %s\n", + slave_port_id, rte_strerror(-ret)); + return -1; + } + + if (link_props.link_status == ETH_LINK_UP) { + if (internals->active_slave_count == 0 && + !internals->user_defined_primary_port) + bond_ethdev_primary_set(internals, + slave_port_id); + } + } + + /* Add slave details to bonded device */ + slave_eth_dev->data->dev_flags |= RTE_ETH_DEV_BONDED_SLAVE; + + slave_vlan_filter_set(bonded_port_id, slave_port_id); + + return 0; + +} + +int +rte_eth_bond_slave_add(uint16_t bonded_port_id, uint16_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + + int retval; + + /* Verify that port id's are valid bonded and slave ports */ + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + rte_spinlock_lock(&internals->lock); + + retval = __eth_bond_slave_add_lock_free(bonded_port_id, slave_port_id); + + rte_spinlock_unlock(&internals->lock); + + return retval; +} + +static int +__eth_bond_slave_remove_lock_free(uint16_t bonded_port_id, + uint16_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + struct rte_eth_dev *slave_eth_dev; + struct rte_flow_error flow_error; + struct rte_flow *flow; + int i, slave_idx; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + if (valid_slave_port_id(slave_port_id, internals->mode) < 0) + return -1; + + /* first remove from active slave list */ + slave_idx = find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_port_id); + + if (slave_idx < internals->active_slave_count) + deactivate_slave(bonded_eth_dev, slave_port_id); + + slave_idx = -1; + /* now find in slave list */ + for (i = 0; i < internals->slave_count; i++) + if (internals->slaves[i].port_id == slave_port_id) { + slave_idx = i; + break; + } + + if (slave_idx < 0) { + RTE_BOND_LOG(ERR, "Couldn't find slave in port list, slave count %d", + internals->slave_count); + return -1; + } + + /* Un-register link status change callback with bonded device pointer as + * argument*/ + rte_eth_dev_callback_unregister(slave_port_id, RTE_ETH_EVENT_INTR_LSC, + bond_ethdev_lsc_event_callback, + &rte_eth_devices[bonded_port_id].data->port_id); + + /* Restore original MAC address of slave device */ + rte_eth_dev_default_mac_addr_set(slave_port_id, + &(internals->slaves[slave_idx].persisted_mac_addr)); + + /* remove additional MAC addresses from the slave */ + slave_remove_mac_addresses(bonded_eth_dev, slave_port_id); + + /* + * Remove bond device flows from slave device. + * Note: don't restore flow isolate mode. + */ + TAILQ_FOREACH(flow, &internals->flow_list, next) { + if (flow->flows[slave_idx] != NULL) { + rte_flow_destroy(slave_port_id, flow->flows[slave_idx], + &flow_error); + flow->flows[slave_idx] = NULL; + } + } + + slave_eth_dev = &rte_eth_devices[slave_port_id]; + slave_remove(internals, slave_eth_dev); + slave_eth_dev->data->dev_flags &= (~RTE_ETH_DEV_BONDED_SLAVE); + + /* first slave in the active list will be the primary by default, + * otherwise use first device in list */ + if (internals->current_primary_port == slave_port_id) { + if (internals->active_slave_count > 0) + internals->current_primary_port = internals->active_slaves[0]; + else if (internals->slave_count > 0) + internals->current_primary_port = internals->slaves[0].port_id; + else + internals->primary_port = 0; + } + + if (internals->active_slave_count < 1) { + /* if no slaves are any longer attached to bonded device and MAC is not + * user defined then clear MAC of bonded device as it will be reset + * when a new slave is added */ + if (internals->slave_count < 1 && !internals->user_defined_mac) + memset(rte_eth_devices[bonded_port_id].data->mac_addrs, 0, + sizeof(*(rte_eth_devices[bonded_port_id].data->mac_addrs))); + } + if (internals->slave_count == 0) { + internals->rx_offload_capa = 0; + internals->tx_offload_capa = 0; + internals->rx_queue_offload_capa = 0; + internals->tx_queue_offload_capa = 0; + internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK; + internals->reta_size = 0; + internals->candidate_max_rx_pktlen = 0; + internals->max_rx_pktlen = 0; + } + return 0; +} + +int +rte_eth_bond_slave_remove(uint16_t bonded_port_id, uint16_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + int retval; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + rte_spinlock_lock(&internals->lock); + + retval = __eth_bond_slave_remove_lock_free(bonded_port_id, slave_port_id); + + rte_spinlock_unlock(&internals->lock); + + return retval; +} + +int +rte_eth_bond_mode_set(uint16_t bonded_port_id, uint8_t mode) +{ + struct rte_eth_dev *bonded_eth_dev; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + + if (check_for_master_bonded_ethdev(bonded_eth_dev) != 0 && + mode == BONDING_MODE_8023AD) + return -1; + + return bond_ethdev_mode_set(bonded_eth_dev, mode); +} + +int +rte_eth_bond_mode_get(uint16_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->mode; +} + +int +rte_eth_bond_primary_set(uint16_t bonded_port_id, uint16_t slave_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + if (valid_slave_port_id(slave_port_id, internals->mode) != 0) + return -1; + + internals->user_defined_primary_port = 1; + internals->primary_port = slave_port_id; + + bond_ethdev_primary_set(internals, slave_port_id); + + return 0; +} + +int +rte_eth_bond_primary_get(uint16_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + if (internals->slave_count < 1) + return -1; + + return internals->current_primary_port; +} + +int +rte_eth_bond_slaves_get(uint16_t bonded_port_id, uint16_t slaves[], + uint16_t len) +{ + struct bond_dev_private *internals; + uint16_t i; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + if (slaves == NULL) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + if (internals->slave_count > len) + return -1; + + for (i = 0; i < internals->slave_count; i++) + slaves[i] = internals->slaves[i].port_id; + + return internals->slave_count; +} + +int +rte_eth_bond_active_slaves_get(uint16_t bonded_port_id, uint16_t slaves[], + uint16_t len) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + if (slaves == NULL) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + if (internals->active_slave_count > len) + return -1; + + memcpy(slaves, internals->active_slaves, + internals->active_slave_count * sizeof(internals->active_slaves[0])); + + return internals->active_slave_count; +} + +int +rte_eth_bond_mac_address_set(uint16_t bonded_port_id, + struct rte_ether_addr *mac_addr) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + /* Set MAC Address of Bonded Device */ + if (mac_address_set(bonded_eth_dev, mac_addr)) + return -1; + + internals->user_defined_mac = 1; + + /* Update all slave devices MACs*/ + if (internals->slave_count > 0) + return mac_address_slaves_update(bonded_eth_dev); + + return 0; +} + +int +rte_eth_bond_mac_address_reset(uint16_t bonded_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + internals = bonded_eth_dev->data->dev_private; + + internals->user_defined_mac = 0; + + if (internals->slave_count > 0) { + int slave_port; + /* Get the primary slave location based on the primary port + * number as, while slave_add(), we will keep the primary + * slave based on slave_count,but not based on the primary port. + */ + for (slave_port = 0; slave_port < internals->slave_count; + slave_port++) { + if (internals->slaves[slave_port].port_id == + internals->primary_port) + break; + } + + /* Set MAC Address of Bonded Device */ + if (mac_address_set(bonded_eth_dev, + &internals->slaves[slave_port].persisted_mac_addr) + != 0) { + RTE_BOND_LOG(ERR, "Failed to set MAC address on bonded device"); + return -1; + } + /* Update all slave devices MAC addresses */ + return mac_address_slaves_update(bonded_eth_dev); + } + /* No need to update anything as no slaves present */ + return 0; +} + +int +rte_eth_bond_xmit_policy_set(uint16_t bonded_port_id, uint8_t policy) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + switch (policy) { + case BALANCE_XMIT_POLICY_LAYER2: + internals->balance_xmit_policy = policy; + internals->burst_xmit_hash = burst_xmit_l2_hash; + break; + case BALANCE_XMIT_POLICY_LAYER23: + internals->balance_xmit_policy = policy; + internals->burst_xmit_hash = burst_xmit_l23_hash; + break; + case BALANCE_XMIT_POLICY_LAYER34: + internals->balance_xmit_policy = policy; + internals->burst_xmit_hash = burst_xmit_l34_hash; + break; + + default: + return -1; + } + return 0; +} + +int +rte_eth_bond_xmit_policy_get(uint16_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->balance_xmit_policy; +} + +int +rte_eth_bond_link_monitoring_set(uint16_t bonded_port_id, uint32_t internal_ms) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + internals->link_status_polling_interval_ms = internal_ms; + + return 0; +} + +int +rte_eth_bond_link_monitoring_get(uint16_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->link_status_polling_interval_ms; +} + +int +rte_eth_bond_link_down_prop_delay_set(uint16_t bonded_port_id, + uint32_t delay_ms) + +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + internals->link_down_delay_ms = delay_ms; + + return 0; +} + +int +rte_eth_bond_link_down_prop_delay_get(uint16_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->link_down_delay_ms; +} + +int +rte_eth_bond_link_up_prop_delay_set(uint16_t bonded_port_id, uint32_t delay_ms) + +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + internals->link_up_delay_ms = delay_ms; + + return 0; +} + +int +rte_eth_bond_link_up_prop_delay_get(uint16_t bonded_port_id) +{ + struct bond_dev_private *internals; + + if (valid_bonded_port_id(bonded_port_id) != 0) + return -1; + + internals = rte_eth_devices[bonded_port_id].data->dev_private; + + return internals->link_up_delay_ms; +} diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_args.c b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_args.c new file mode 100644 index 000000000..abdf55261 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_args.c @@ -0,0 +1,301 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2014 Intel Corporation + */ + +#include <rte_devargs.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_kvargs.h> + +#include "rte_eth_bond.h" +#include "eth_bond_private.h" + +const char *pmd_bond_init_valid_arguments[] = { + PMD_BOND_SLAVE_PORT_KVARG, + PMD_BOND_PRIMARY_SLAVE_KVARG, + PMD_BOND_MODE_KVARG, + PMD_BOND_XMIT_POLICY_KVARG, + PMD_BOND_SOCKET_ID_KVARG, + PMD_BOND_MAC_ADDR_KVARG, + PMD_BOND_AGG_MODE_KVARG, + "driver", + NULL +}; + +static inline int +find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr) +{ + struct rte_pci_device *pci_dev; + struct rte_pci_addr *eth_pci_addr; + unsigned i; + + RTE_ETH_FOREACH_DEV(i) { + pci_dev = RTE_ETH_DEV_TO_PCI(&rte_eth_devices[i]); + eth_pci_addr = &pci_dev->addr; + + if (pci_addr->bus == eth_pci_addr->bus && + pci_addr->devid == eth_pci_addr->devid && + pci_addr->domain == eth_pci_addr->domain && + pci_addr->function == eth_pci_addr->function) + return i; + } + return -1; +} + +static inline int +find_port_id_by_dev_name(const char *name) +{ + unsigned i; + + RTE_ETH_FOREACH_DEV(i) { + if (rte_eth_devices[i].data == NULL) + continue; + + if (strcmp(rte_eth_devices[i].device->name, name) == 0) + return i; + } + return -1; +} + +static inline int +bond_pci_addr_cmp(const struct rte_device *dev, const void *_pci_addr) +{ + const struct rte_pci_device *pdev = RTE_DEV_TO_PCI_CONST(dev); + const struct rte_pci_addr *paddr = _pci_addr; + + return rte_pci_addr_cmp(&pdev->addr, paddr); +} + +/** + * Parses a port identifier string to a port id by pci address, then by name, + * and finally port id. + */ +static inline int +parse_port_id(const char *port_str) +{ + struct rte_pci_addr dev_addr; + struct rte_bus *pci_bus; + struct rte_device *dev; + int port_id; + + pci_bus = rte_bus_find_by_name("pci"); + if (pci_bus == NULL) { + RTE_BOND_LOG(ERR, "unable to find PCI bus\n"); + return -1; + } + + /* try parsing as pci address, physical devices */ + if (pci_bus->parse(port_str, &dev_addr) == 0) { + dev = pci_bus->find_device(NULL, bond_pci_addr_cmp, &dev_addr); + if (dev == NULL) { + RTE_BOND_LOG(ERR, "unable to find PCI device"); + return -1; + } + port_id = find_port_id_by_pci_addr(&dev_addr); + if (port_id < 0) + return -1; + } else { + /* try parsing as device name, virtual devices */ + port_id = find_port_id_by_dev_name(port_str); + if (port_id < 0) { + char *end; + errno = 0; + + /* try parsing as port id */ + port_id = strtol(port_str, &end, 10); + if (*end != 0 || errno != 0) + return -1; + } + } + + if (port_id < 0 || port_id > RTE_MAX_ETHPORTS) { + RTE_BOND_LOG(ERR, "Slave port specified (%s) outside expected range", + port_str); + return -1; + } + return port_id; +} + +int +bond_ethdev_parse_slave_port_kvarg(const char *key, + const char *value, void *extra_args) +{ + struct bond_ethdev_slave_ports *slave_ports; + + if (value == NULL || extra_args == NULL) + return -1; + + slave_ports = extra_args; + + if (strcmp(key, PMD_BOND_SLAVE_PORT_KVARG) == 0) { + int port_id = parse_port_id(value); + if (port_id < 0) { + RTE_BOND_LOG(ERR, "Invalid slave port value (%s) specified", + value); + return -1; + } else + slave_ports->slaves[slave_ports->slave_count++] = + port_id; + } + return 0; +} + +int +bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + uint8_t *mode; + char *endptr; + + if (value == NULL || extra_args == NULL) + return -1; + + mode = extra_args; + + errno = 0; + *mode = strtol(value, &endptr, 10); + if (*endptr != 0 || errno != 0) + return -1; + + /* validate mode value */ + switch (*mode) { + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + case BONDING_MODE_8023AD: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + return 0; + default: + RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value); + return -1; + } +} + +int +bond_ethdev_parse_slave_agg_mode_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + uint8_t *agg_mode; + + if (value == NULL || extra_args == NULL) + return -1; + + agg_mode = extra_args; + + errno = 0; + if (strncmp(value, "stable", 6) == 0) + *agg_mode = AGG_STABLE; + + if (strncmp(value, "bandwidth", 9) == 0) + *agg_mode = AGG_BANDWIDTH; + + if (strncmp(value, "count", 5) == 0) + *agg_mode = AGG_COUNT; + + switch (*agg_mode) { + case AGG_STABLE: + case AGG_BANDWIDTH: + case AGG_COUNT: + return 0; + default: + RTE_BOND_LOG(ERR, "Invalid agg mode value stable/bandwidth/count"); + return -1; + } +} + +int +bond_ethdev_parse_socket_id_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + int socket_id; + char *endptr; + + if (value == NULL || extra_args == NULL) + return -1; + + errno = 0; + socket_id = (uint8_t)strtol(value, &endptr, 10); + if (*endptr != 0 || errno != 0) + return -1; + + /* validate socket id value */ + if (socket_id >= 0) { + *(uint8_t *)extra_args = (uint8_t)socket_id; + return 0; + } + return -1; +} + +int +bond_ethdev_parse_primary_slave_port_id_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + int primary_slave_port_id; + + if (value == NULL || extra_args == NULL) + return -1; + + primary_slave_port_id = parse_port_id(value); + if (primary_slave_port_id < 0) + return -1; + + *(uint16_t *)extra_args = (uint16_t)primary_slave_port_id; + + return 0; +} + +int +bond_ethdev_parse_balance_xmit_policy_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + uint8_t *xmit_policy; + + if (value == NULL || extra_args == NULL) + return -1; + + xmit_policy = extra_args; + + if (strcmp(PMD_BOND_XMIT_POLICY_LAYER2_KVARG, value) == 0) + *xmit_policy = BALANCE_XMIT_POLICY_LAYER2; + else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER23_KVARG, value) == 0) + *xmit_policy = BALANCE_XMIT_POLICY_LAYER23; + else if (strcmp(PMD_BOND_XMIT_POLICY_LAYER34_KVARG, value) == 0) + *xmit_policy = BALANCE_XMIT_POLICY_LAYER34; + else + return -1; + + return 0; +} + +int +bond_ethdev_parse_bond_mac_addr_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + if (value == NULL || extra_args == NULL) + return -1; + + /* Parse MAC */ + return rte_ether_unformat_addr(value, extra_args); +} + +int +bond_ethdev_parse_time_ms_kvarg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + uint32_t time_ms; + char *endptr; + + if (value == NULL || extra_args == NULL) + return -1; + + errno = 0; + time_ms = (uint32_t)strtol(value, &endptr, 10); + if (*endptr != 0 || errno != 0) + return -1; + + *(uint32_t *)extra_args = time_ms; + + return 0; +} diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_flow.c b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_flow.c new file mode 100644 index 000000000..417f76bf6 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_flow.c @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 Mellanox Technologies, Ltd + */ + +#include <stddef.h> +#include <string.h> +#include <sys/queue.h> + +#include <rte_errno.h> +#include <rte_malloc.h> +#include <rte_tailq.h> +#include <rte_flow.h> + +#include "eth_bond_private.h" + +static struct rte_flow * +bond_flow_alloc(int numa_node, const struct rte_flow_attr *attr, + const struct rte_flow_item *items, + const struct rte_flow_action *actions) +{ + struct rte_flow *flow; + const struct rte_flow_conv_rule rule = { + .attr_ro = attr, + .pattern_ro = items, + .actions_ro = actions, + }; + struct rte_flow_error error; + int ret; + + ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, NULL, 0, &rule, &error); + if (ret < 0) { + RTE_BOND_LOG(ERR, "Unable to process flow rule (%s): %s", + error.message ? error.message : "unspecified", + strerror(rte_errno)); + return NULL; + } + flow = rte_zmalloc_socket(NULL, offsetof(struct rte_flow, rule) + ret, + RTE_CACHE_LINE_SIZE, numa_node); + if (unlikely(flow == NULL)) { + RTE_BOND_LOG(ERR, "Could not allocate new flow"); + return NULL; + } + ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, &flow->rule, ret, &rule, + &error); + if (ret < 0) { + RTE_BOND_LOG(ERR, "Failed to copy flow rule (%s): %s", + error.message ? error.message : "unspecified", + strerror(rte_errno)); + rte_free(flow); + return NULL; + } + return flow; +} + +static void +bond_flow_release(struct rte_flow **flow) +{ + rte_free(*flow); + *flow = NULL; +} + +static int +bond_flow_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + const struct rte_flow_item patterns[], + const struct rte_flow_action actions[], + struct rte_flow_error *err) +{ + struct bond_dev_private *internals = dev->data->dev_private; + int i; + int ret; + + for (i = 0; i < internals->slave_count; i++) { + ret = rte_flow_validate(internals->slaves[i].port_id, attr, + patterns, actions, err); + if (ret) { + RTE_BOND_LOG(ERR, "Operation rte_flow_validate failed" + " for slave %d with error %d", i, ret); + return ret; + } + } + return 0; +} + +static struct rte_flow * +bond_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, + const struct rte_flow_item patterns[], + const struct rte_flow_action actions[], + struct rte_flow_error *err) +{ + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_flow *flow; + int i; + + flow = bond_flow_alloc(dev->data->numa_node, attr, patterns, actions); + if (unlikely(flow == NULL)) { + rte_flow_error_set(err, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOMEM)); + return NULL; + } + for (i = 0; i < internals->slave_count; i++) { + flow->flows[i] = rte_flow_create(internals->slaves[i].port_id, + attr, patterns, actions, err); + if (unlikely(flow->flows[i] == NULL)) { + RTE_BOND_LOG(ERR, "Failed to create flow on slave %d", + i); + goto err; + } + } + TAILQ_INSERT_TAIL(&internals->flow_list, flow, next); + return flow; +err: + /* Destroy all slaves flows. */ + for (i = 0; i < internals->slave_count; i++) { + if (flow->flows[i] != NULL) + rte_flow_destroy(internals->slaves[i].port_id, + flow->flows[i], err); + } + bond_flow_release(&flow); + return NULL; +} + +static int +bond_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, + struct rte_flow_error *err) +{ + struct bond_dev_private *internals = dev->data->dev_private; + int i; + int ret = 0; + + for (i = 0; i < internals->slave_count; i++) { + int lret; + + if (unlikely(flow->flows[i] == NULL)) + continue; + lret = rte_flow_destroy(internals->slaves[i].port_id, + flow->flows[i], err); + if (unlikely(lret != 0)) { + RTE_BOND_LOG(ERR, "Failed to destroy flow on slave %d:" + " %d", i, lret); + ret = lret; + } + } + TAILQ_REMOVE(&internals->flow_list, flow, next); + bond_flow_release(&flow); + return ret; +} + +static int +bond_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *err) +{ + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_flow *flow; + void *tmp; + int ret = 0; + int lret; + + /* Destroy all bond flows from its slaves instead of flushing them to + * keep the LACP flow or any other external flows. + */ + TAILQ_FOREACH_SAFE(flow, &internals->flow_list, next, tmp) { + lret = bond_flow_destroy(dev, flow, err); + if (unlikely(lret != 0)) + ret = lret; + } + if (unlikely(ret != 0)) + RTE_BOND_LOG(ERR, "Failed to flush flow in all slaves"); + return ret; +} + +static int +bond_flow_query_count(struct rte_eth_dev *dev, struct rte_flow *flow, + const struct rte_flow_action *action, + struct rte_flow_query_count *count, + struct rte_flow_error *err) +{ + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_flow_query_count slave_count; + int i; + int ret; + + count->bytes = 0; + count->hits = 0; + rte_memcpy(&slave_count, count, sizeof(slave_count)); + for (i = 0; i < internals->slave_count; i++) { + ret = rte_flow_query(internals->slaves[i].port_id, + flow->flows[i], action, + &slave_count, err); + if (unlikely(ret != 0)) { + RTE_BOND_LOG(ERR, "Failed to query flow on" + " slave %d: %d", i, ret); + return ret; + } + count->bytes += slave_count.bytes; + count->hits += slave_count.hits; + slave_count.bytes = 0; + slave_count.hits = 0; + } + return 0; +} + +static int +bond_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow, + const struct rte_flow_action *action, void *arg, + struct rte_flow_error *err) +{ + switch (action->type) { + case RTE_FLOW_ACTION_TYPE_COUNT: + return bond_flow_query_count(dev, flow, action, arg, err); + default: + return rte_flow_error_set(err, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, arg, + rte_strerror(ENOTSUP)); + } +} + +static int +bond_flow_isolate(struct rte_eth_dev *dev, int set, + struct rte_flow_error *err) +{ + struct bond_dev_private *internals = dev->data->dev_private; + int i; + int ret; + + for (i = 0; i < internals->slave_count; i++) { + ret = rte_flow_isolate(internals->slaves[i].port_id, set, err); + if (unlikely(ret != 0)) { + RTE_BOND_LOG(ERR, "Operation rte_flow_isolate failed" + " for slave %d with error %d", i, ret); + internals->flow_isolated_valid = 0; + return ret; + } + } + internals->flow_isolated = set; + internals->flow_isolated_valid = 1; + return 0; +} + +const struct rte_flow_ops bond_flow_ops = { + .validate = bond_flow_validate, + .create = bond_flow_create, + .destroy = bond_flow_destroy, + .flush = bond_flow_flush, + .query = bond_flow_query, + .isolate = bond_flow_isolate, +}; diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c new file mode 100644 index 000000000..612a64599 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c @@ -0,0 +1,3760 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ +#include <stdlib.h> +#include <stdbool.h> +#include <netinet/in.h> + +#include <rte_mbuf.h> +#include <rte_malloc.h> +#include <rte_ethdev_driver.h> +#include <rte_ethdev_vdev.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_ip.h> +#include <rte_ip_frag.h> +#include <rte_devargs.h> +#include <rte_kvargs.h> +#include <rte_bus_vdev.h> +#include <rte_alarm.h> +#include <rte_cycles.h> +#include <rte_string_fns.h> + +#include "rte_eth_bond.h" +#include "eth_bond_private.h" +#include "eth_bond_8023ad_private.h" + +#define REORDER_PERIOD_MS 10 +#define DEFAULT_POLLING_INTERVAL_10_MS (10) +#define BOND_MAX_MAC_ADDRS 16 + +#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port) + +/* Table for statistics in mode 5 TLB */ +static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS]; + +static inline size_t +get_vlan_offset(struct rte_ether_hdr *eth_hdr, uint16_t *proto) +{ + size_t vlan_offset = 0; + + if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto || + rte_cpu_to_be_16(RTE_ETHER_TYPE_QINQ) == *proto) { + struct rte_vlan_hdr *vlan_hdr = + (struct rte_vlan_hdr *)(eth_hdr + 1); + + vlan_offset = sizeof(struct rte_vlan_hdr); + *proto = vlan_hdr->eth_proto; + + if (rte_cpu_to_be_16(RTE_ETHER_TYPE_VLAN) == *proto) { + vlan_hdr = vlan_hdr + 1; + *proto = vlan_hdr->eth_proto; + vlan_offset += sizeof(struct rte_vlan_hdr); + } + } + return vlan_offset; +} + +static uint16_t +bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + + uint16_t num_rx_total = 0; + uint16_t slave_count; + uint16_t active_slave; + int i; + + /* Cast to structure, containing bonded device's port id and queue id */ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; + internals = bd_rx_q->dev_private; + slave_count = internals->active_slave_count; + active_slave = internals->active_slave; + + for (i = 0; i < slave_count && nb_pkts; i++) { + uint16_t num_rx_slave; + + /* Offset of pointer to *bufs increases as packets are received + * from other slaves */ + num_rx_slave = + rte_eth_rx_burst(internals->active_slaves[active_slave], + bd_rx_q->queue_id, + bufs + num_rx_total, nb_pkts); + num_rx_total += num_rx_slave; + nb_pkts -= num_rx_slave; + if (++active_slave == slave_count) + active_slave = 0; + } + + if (++internals->active_slave >= slave_count) + internals->active_slave = 0; + return num_rx_total; +} + +static uint16_t +bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + + /* Cast to structure, containing bonded device's port id and queue id */ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; + + internals = bd_rx_q->dev_private; + + return rte_eth_rx_burst(internals->current_primary_port, + bd_rx_q->queue_id, bufs, nb_pkts); +} + +static inline uint8_t +is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf) +{ + const uint16_t ether_type_slow_be = + rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW); + + return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) && + (ethertype == ether_type_slow_be && + (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP)); +} + +/***************************************************************************** + * Flow director's setup for mode 4 optimization + */ + +static struct rte_flow_item_eth flow_item_eth_type_8023ad = { + .dst.addr_bytes = { 0 }, + .src.addr_bytes = { 0 }, + .type = RTE_BE16(RTE_ETHER_TYPE_SLOW), +}; + +static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = { + .dst.addr_bytes = { 0 }, + .src.addr_bytes = { 0 }, + .type = 0xFFFF, +}; + +static struct rte_flow_item flow_item_8023ad[] = { + { + .type = RTE_FLOW_ITEM_TYPE_ETH, + .spec = &flow_item_eth_type_8023ad, + .last = NULL, + .mask = &flow_item_eth_mask_type_8023ad, + }, + { + .type = RTE_FLOW_ITEM_TYPE_END, + .spec = NULL, + .last = NULL, + .mask = NULL, + } +}; + +const struct rte_flow_attr flow_attr_8023ad = { + .group = 0, + .priority = 0, + .ingress = 1, + .egress = 0, + .reserved = 0, +}; + +int +bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev, + uint16_t slave_port) { + struct rte_eth_dev_info slave_info; + struct rte_flow_error error; + struct bond_dev_private *internals = bond_dev->data->dev_private; + + const struct rte_flow_action_queue lacp_queue_conf = { + .index = 0, + }; + + const struct rte_flow_action actions[] = { + { + .type = RTE_FLOW_ACTION_TYPE_QUEUE, + .conf = &lacp_queue_conf + }, + { + .type = RTE_FLOW_ACTION_TYPE_END, + } + }; + + int ret = rte_flow_validate(slave_port, &flow_attr_8023ad, + flow_item_8023ad, actions, &error); + if (ret < 0) { + RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)", + __func__, error.message, slave_port, + internals->mode4.dedicated_queues.rx_qid); + return -1; + } + + ret = rte_eth_dev_info_get(slave_port, &slave_info); + if (ret != 0) { + RTE_BOND_LOG(ERR, + "%s: Error during getting device (port %u) info: %s\n", + __func__, slave_port, strerror(-ret)); + + return ret; + } + + if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues || + slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) { + RTE_BOND_LOG(ERR, + "%s: Slave %d capabilities doesn't allow to allocate additional queues", + __func__, slave_port); + return -1; + } + + return 0; +} + +int +bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) { + struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id]; + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct rte_eth_dev_info bond_info; + uint16_t idx; + int ret; + + /* Verify if all slaves in bonding supports flow director and */ + if (internals->slave_count > 0) { + ret = rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info); + if (ret != 0) { + RTE_BOND_LOG(ERR, + "%s: Error during getting device (port %u) info: %s\n", + __func__, bond_dev->data->port_id, + strerror(-ret)); + + return ret; + } + + internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues; + internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues; + + for (idx = 0; idx < internals->slave_count; idx++) { + if (bond_ethdev_8023ad_flow_verify(bond_dev, + internals->slaves[idx].port_id) != 0) + return -1; + } + } + + return 0; +} + +int +bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) { + + struct rte_flow_error error; + struct bond_dev_private *internals = bond_dev->data->dev_private; + struct rte_flow_action_queue lacp_queue_conf = { + .index = internals->mode4.dedicated_queues.rx_qid, + }; + + const struct rte_flow_action actions[] = { + { + .type = RTE_FLOW_ACTION_TYPE_QUEUE, + .conf = &lacp_queue_conf + }, + { + .type = RTE_FLOW_ACTION_TYPE_END, + } + }; + + internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port, + &flow_attr_8023ad, flow_item_8023ad, actions, &error); + if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) { + RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s " + "(slave_port=%d queue_id=%d)", + error.message, slave_port, + internals->mode4.dedicated_queues.rx_qid); + return -1; + } + + return 0; +} + +static inline uint16_t +rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts, + bool dedicated_rxq) +{ + /* Cast to structure, containing bonded device's port id and queue id */ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue; + struct bond_dev_private *internals = bd_rx_q->dev_private; + struct rte_eth_dev *bonded_eth_dev = + &rte_eth_devices[internals->port_id]; + struct rte_ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs; + struct rte_ether_hdr *hdr; + + const uint16_t ether_type_slow_be = + rte_be_to_cpu_16(RTE_ETHER_TYPE_SLOW); + uint16_t num_rx_total = 0; /* Total number of received packets */ + uint16_t slaves[RTE_MAX_ETHPORTS]; + uint16_t slave_count, idx; + + uint8_t collecting; /* current slave collecting status */ + const uint8_t promisc = rte_eth_promiscuous_get(internals->port_id); + const uint8_t allmulti = rte_eth_allmulticast_get(internals->port_id); + uint8_t subtype; + uint16_t i; + uint16_t j; + uint16_t k; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + slave_count = internals->active_slave_count; + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * slave_count); + + idx = internals->active_slave; + if (idx >= slave_count) { + internals->active_slave = 0; + idx = 0; + } + for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) { + j = num_rx_total; + collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]], + COLLECTING); + + /* Read packets from this slave */ + num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id, + &bufs[num_rx_total], nb_pkts - num_rx_total); + + for (k = j; k < 2 && k < num_rx_total; k++) + rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *)); + + /* Handle slow protocol packets. */ + while (j < num_rx_total) { + if (j + 3 < num_rx_total) + rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); + + hdr = rte_pktmbuf_mtod(bufs[j], struct rte_ether_hdr *); + subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype; + + /* Remove packet from array if: + * - it is slow packet but no dedicated rxq is present, + * - slave is not in collecting state, + * - bonding interface is not in promiscuous mode: + * - packet is unicast and address does not match, + * - packet is multicast and bonding interface + * is not in allmulti, + */ + if (unlikely( + (!dedicated_rxq && + is_lacp_packets(hdr->ether_type, subtype, + bufs[j])) || + !collecting || + (!promisc && + ((rte_is_unicast_ether_addr(&hdr->d_addr) && + !rte_is_same_ether_addr(bond_mac, + &hdr->d_addr)) || + (!allmulti && + rte_is_multicast_ether_addr(&hdr->d_addr)))))) { + + if (hdr->ether_type == ether_type_slow_be) { + bond_mode_8023ad_handle_slow_pkt( + internals, slaves[idx], bufs[j]); + } else + rte_pktmbuf_free(bufs[j]); + + /* Packet is managed by mode 4 or dropped, shift the array */ + num_rx_total--; + if (j < num_rx_total) { + memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) * + (num_rx_total - j)); + } + } else + j++; + } + if (unlikely(++idx == slave_count)) + idx = 0; + } + + if (++internals->active_slave >= slave_count) + internals->active_slave = 0; + + return num_rx_total; +} + +static uint16_t +bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + return rx_burst_8023ad(queue, bufs, nb_pkts, false); +} + +static uint16_t +bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + return rx_burst_8023ad(queue, bufs, nb_pkts, true); +} + +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) +uint32_t burstnumberRX; +uint32_t burstnumberTX; + +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + +static void +arp_op_name(uint16_t arp_op, char *buf, size_t buf_len) +{ + switch (arp_op) { + case RTE_ARP_OP_REQUEST: + strlcpy(buf, "ARP Request", buf_len); + return; + case RTE_ARP_OP_REPLY: + strlcpy(buf, "ARP Reply", buf_len); + return; + case RTE_ARP_OP_REVREQUEST: + strlcpy(buf, "Reverse ARP Request", buf_len); + return; + case RTE_ARP_OP_REVREPLY: + strlcpy(buf, "Reverse ARP Reply", buf_len); + return; + case RTE_ARP_OP_INVREQUEST: + strlcpy(buf, "Peer Identify Request", buf_len); + return; + case RTE_ARP_OP_INVREPLY: + strlcpy(buf, "Peer Identify Reply", buf_len); + return; + default: + break; + } + strlcpy(buf, "Unknown", buf_len); + return; +} +#endif +#define MaxIPv4String 16 +static void +ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size) +{ + uint32_t ipv4_addr; + + ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr); + snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF, + (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF, + ipv4_addr & 0xFF); +} + +#define MAX_CLIENTS_NUMBER 128 +uint8_t active_clients; +struct client_stats_t { + uint16_t port; + uint32_t ipv4_addr; + uint32_t ipv4_rx_packets; + uint32_t ipv4_tx_packets; +}; +struct client_stats_t client_stats[MAX_CLIENTS_NUMBER]; + +static void +update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator) +{ + int i = 0; + + for (; i < MAX_CLIENTS_NUMBER; i++) { + if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) { + /* Just update RX packets number for this client */ + if (TXorRXindicator == &burstnumberRX) + client_stats[i].ipv4_rx_packets++; + else + client_stats[i].ipv4_tx_packets++; + return; + } + } + /* We have a new client. Insert him to the table, and increment stats */ + if (TXorRXindicator == &burstnumberRX) + client_stats[active_clients].ipv4_rx_packets++; + else + client_stats[active_clients].ipv4_tx_packets++; + client_stats[active_clients].ipv4_addr = addr; + client_stats[active_clients].port = port; + active_clients++; + +} + +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB +#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \ + rte_log(RTE_LOG_DEBUG, bond_logtype, \ + "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \ + "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \ + info, \ + port, \ + eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \ + eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \ + eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \ + src_ip, \ + eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \ + eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \ + eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \ + dst_ip, \ + arp_op, ++burstnumber) +#endif + +static void +mode6_debug(const char __rte_unused *info, + struct rte_ether_hdr *eth_h, uint16_t port, + uint32_t __rte_unused *burstnumber) +{ + struct rte_ipv4_hdr *ipv4_h; +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + struct rte_arp_hdr *arp_h; + char dst_ip[16]; + char ArpOp[24]; + char buf[16]; +#endif + char src_ip[16]; + + uint16_t ether_type = eth_h->ether_type; + uint16_t offset = get_vlan_offset(eth_h, ðer_type); + +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + strlcpy(buf, info, 16); +#endif + + if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { + ipv4_h = (struct rte_ipv4_hdr *)((char *)(eth_h + 1) + offset); + ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String); +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String); + MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber); +#endif + update_client_stats(ipv4_h->src_addr, port, burstnumber); + } +#ifdef RTE_LIBRTE_BOND_DEBUG_ALB + else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { + arp_h = (struct rte_arp_hdr *)((char *)(eth_h + 1) + offset); + ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String); + ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String); + arp_op_name(rte_be_to_cpu_16(arp_h->arp_opcode), + ArpOp, sizeof(ArpOp)); + MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber); + } +#endif +} +#endif + +static uint16_t +bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + struct rte_ether_hdr *eth_h; + uint16_t ether_type, offset; + uint16_t nb_recv_pkts; + int i; + + nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts); + + for (i = 0; i < nb_recv_pkts; i++) { + eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *); + ether_type = eth_h->ether_type; + offset = get_vlan_offset(eth_h, ðer_type); + + if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX); +#endif + bond_mode_alb_arp_recv(eth_h, offset, internals); + } +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + else if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) + mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX); +#endif + } + + return nb_recv_pkts; +} + +static uint16_t +bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct bond_tx_queue *bd_tx_q; + + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts]; + uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 }; + + uint16_t num_of_slaves; + uint16_t slaves[RTE_MAX_ETHPORTS]; + + uint16_t num_tx_total = 0, num_tx_slave; + + static int slave_idx = 0; + int i, cslave_idx = 0, tx_fail_total = 0; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + num_of_slaves = internals->active_slave_count; + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * num_of_slaves); + + if (num_of_slaves < 1) + return num_tx_total; + + /* Populate slaves mbuf with which packets are to be sent on it */ + for (i = 0; i < nb_pkts; i++) { + cslave_idx = (slave_idx + i) % num_of_slaves; + slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i]; + } + + /* increment current slave index so the next call to tx burst starts on the + * next slave */ + slave_idx = ++cslave_idx; + + /* Send packet burst on each slave device */ + for (i = 0; i < num_of_slaves; i++) { + if (slave_nb_pkts[i] > 0) { + num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + slave_bufs[i], slave_nb_pkts[i]); + + /* if tx burst fails move packets to end of bufs */ + if (unlikely(num_tx_slave < slave_nb_pkts[i])) { + int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave; + + tx_fail_total += tx_fail_slave; + + memcpy(&bufs[nb_pkts - tx_fail_total], + &slave_bufs[i][num_tx_slave], + tx_fail_slave * sizeof(bufs[0])); + } + num_tx_total += num_tx_slave; + } + } + + return num_tx_total; +} + +static uint16_t +bond_ethdev_tx_burst_active_backup(void *queue, + struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct bond_tx_queue *bd_tx_q; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + + if (internals->active_slave_count < 1) + return 0; + + return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id, + bufs, nb_pkts); +} + +static inline uint16_t +ether_hash(struct rte_ether_hdr *eth_hdr) +{ + unaligned_uint16_t *word_src_addr = + (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes; + unaligned_uint16_t *word_dst_addr = + (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes; + + return (word_src_addr[0] ^ word_dst_addr[0]) ^ + (word_src_addr[1] ^ word_dst_addr[1]) ^ + (word_src_addr[2] ^ word_dst_addr[2]); +} + +static inline uint32_t +ipv4_hash(struct rte_ipv4_hdr *ipv4_hdr) +{ + return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr; +} + +static inline uint32_t +ipv6_hash(struct rte_ipv6_hdr *ipv6_hdr) +{ + unaligned_uint32_t *word_src_addr = + (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]); + unaligned_uint32_t *word_dst_addr = + (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]); + + return (word_src_addr[0] ^ word_dst_addr[0]) ^ + (word_src_addr[1] ^ word_dst_addr[1]) ^ + (word_src_addr[2] ^ word_dst_addr[2]) ^ + (word_src_addr[3] ^ word_dst_addr[3]); +} + + +void +burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint16_t slave_count, uint16_t *slaves) +{ + struct rte_ether_hdr *eth_hdr; + uint32_t hash; + int i; + + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); + + hash = ether_hash(eth_hdr); + + slaves[i] = (hash ^= hash >> 8) % slave_count; + } +} + +void +burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint16_t slave_count, uint16_t *slaves) +{ + uint16_t i; + struct rte_ether_hdr *eth_hdr; + uint16_t proto; + size_t vlan_offset; + uint32_t hash, l3hash; + + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); + l3hash = 0; + + proto = eth_hdr->ether_type; + hash = ether_hash(eth_hdr); + + vlan_offset = get_vlan_offset(eth_hdr, &proto); + + if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) { + struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv4_hash(ipv4_hdr); + + } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) { + struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + } + + hash = hash ^ l3hash; + hash ^= hash >> 16; + hash ^= hash >> 8; + + slaves[i] = hash % slave_count; + } +} + +void +burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts, + uint16_t slave_count, uint16_t *slaves) +{ + struct rte_ether_hdr *eth_hdr; + uint16_t proto; + size_t vlan_offset; + int i; + + struct rte_udp_hdr *udp_hdr; + struct rte_tcp_hdr *tcp_hdr; + uint32_t hash, l3hash, l4hash; + + for (i = 0; i < nb_pkts; i++) { + eth_hdr = rte_pktmbuf_mtod(buf[i], struct rte_ether_hdr *); + size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]); + proto = eth_hdr->ether_type; + vlan_offset = get_vlan_offset(eth_hdr, &proto); + l3hash = 0; + l4hash = 0; + + if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4) == proto) { + struct rte_ipv4_hdr *ipv4_hdr = (struct rte_ipv4_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + size_t ip_hdr_offset; + + l3hash = ipv4_hash(ipv4_hdr); + + /* there is no L4 header in fragmented packet */ + if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr) + == 0)) { + ip_hdr_offset = (ipv4_hdr->version_ihl + & RTE_IPV4_HDR_IHL_MASK) * + RTE_IPV4_IHL_MULTIPLIER; + + if (ipv4_hdr->next_proto_id == IPPROTO_TCP) { + tcp_hdr = (struct rte_tcp_hdr *) + ((char *)ipv4_hdr + + ip_hdr_offset); + if ((size_t)tcp_hdr + sizeof(*tcp_hdr) + < pkt_end) + l4hash = HASH_L4_PORTS(tcp_hdr); + } else if (ipv4_hdr->next_proto_id == + IPPROTO_UDP) { + udp_hdr = (struct rte_udp_hdr *) + ((char *)ipv4_hdr + + ip_hdr_offset); + if ((size_t)udp_hdr + sizeof(*udp_hdr) + < pkt_end) + l4hash = HASH_L4_PORTS(udp_hdr); + } + } + } else if (rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6) == proto) { + struct rte_ipv6_hdr *ipv6_hdr = (struct rte_ipv6_hdr *) + ((char *)(eth_hdr + 1) + vlan_offset); + l3hash = ipv6_hash(ipv6_hdr); + + if (ipv6_hdr->proto == IPPROTO_TCP) { + tcp_hdr = (struct rte_tcp_hdr *)(ipv6_hdr + 1); + l4hash = HASH_L4_PORTS(tcp_hdr); + } else if (ipv6_hdr->proto == IPPROTO_UDP) { + udp_hdr = (struct rte_udp_hdr *)(ipv6_hdr + 1); + l4hash = HASH_L4_PORTS(udp_hdr); + } + } + + hash = l3hash ^ l4hash; + hash ^= hash >> 16; + hash ^= hash >> 8; + + slaves[i] = hash % slave_count; + } +} + +struct bwg_slave { + uint64_t bwg_left_int; + uint64_t bwg_left_remainder; + uint16_t slave; +}; + +void +bond_tlb_activate_slave(struct bond_dev_private *internals) { + int i; + + for (i = 0; i < internals->active_slave_count; i++) { + tlb_last_obytets[internals->active_slaves[i]] = 0; + } +} + +static int +bandwidth_cmp(const void *a, const void *b) +{ + const struct bwg_slave *bwg_a = a; + const struct bwg_slave *bwg_b = b; + int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int; + int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder - + (int64_t)bwg_a->bwg_left_remainder; + if (diff > 0) + return 1; + else if (diff < 0) + return -1; + else if (diff2 > 0) + return 1; + else if (diff2 < 0) + return -1; + else + return 0; +} + +static void +bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx, + struct bwg_slave *bwg_slave) +{ + struct rte_eth_link link_status; + int ret; + + ret = rte_eth_link_get_nowait(port_id, &link_status); + if (ret < 0) { + RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s", + port_id, rte_strerror(-ret)); + return; + } + uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8; + if (link_bwg == 0) + return; + link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS; + bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg; + bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg; +} + +static void +bond_ethdev_update_tlb_slave_cb(void *arg) +{ + struct bond_dev_private *internals = arg; + struct rte_eth_stats slave_stats; + struct bwg_slave bwg_array[RTE_MAX_ETHPORTS]; + uint16_t slave_count; + uint64_t tx_bytes; + + uint8_t update_stats = 0; + uint16_t slave_id; + uint16_t i; + + internals->slave_update_idx++; + + + if (internals->slave_update_idx >= REORDER_PERIOD_MS) + update_stats = 1; + + for (i = 0; i < internals->active_slave_count; i++) { + slave_id = internals->active_slaves[i]; + rte_eth_stats_get(slave_id, &slave_stats); + tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id]; + bandwidth_left(slave_id, tx_bytes, + internals->slave_update_idx, &bwg_array[i]); + bwg_array[i].slave = slave_id; + + if (update_stats) { + tlb_last_obytets[slave_id] = slave_stats.obytes; + } + } + + if (update_stats == 1) + internals->slave_update_idx = 0; + + slave_count = i; + qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp); + for (i = 0; i < slave_count; i++) + internals->tlb_slaves_order[i] = bwg_array[i].slave; + + rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb, + (struct bond_dev_private *)internals); +} + +static uint16_t +bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + + struct rte_eth_dev *primary_port = + &rte_eth_devices[internals->primary_port]; + uint16_t num_tx_total = 0; + uint16_t i, j; + + uint16_t num_of_slaves = internals->active_slave_count; + uint16_t slaves[RTE_MAX_ETHPORTS]; + + struct rte_ether_hdr *ether_hdr; + struct rte_ether_addr primary_slave_addr; + struct rte_ether_addr active_slave_addr; + + if (num_of_slaves < 1) + return num_tx_total; + + memcpy(slaves, internals->tlb_slaves_order, + sizeof(internals->tlb_slaves_order[0]) * num_of_slaves); + + + rte_ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr); + + if (nb_pkts > 3) { + for (i = 0; i < 3; i++) + rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*)); + } + + for (i = 0; i < num_of_slaves; i++) { + rte_eth_macaddr_get(slaves[i], &active_slave_addr); + for (j = num_tx_total; j < nb_pkts; j++) { + if (j + 3 < nb_pkts) + rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*)); + + ether_hdr = rte_pktmbuf_mtod(bufs[j], + struct rte_ether_hdr *); + if (rte_is_same_ether_addr(ðer_hdr->s_addr, + &primary_slave_addr)) + rte_ether_addr_copy(&active_slave_addr, + ðer_hdr->s_addr); +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX); +#endif + } + + num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + bufs + num_tx_total, nb_pkts - num_tx_total); + + if (num_tx_total == nb_pkts) + break; + } + + return num_tx_total; +} + +void +bond_tlb_disable(struct bond_dev_private *internals) +{ + rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); +} + +void +bond_tlb_enable(struct bond_dev_private *internals) +{ + bond_ethdev_update_tlb_slave_cb(internals); +} + +static uint16_t +bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + + struct rte_ether_hdr *eth_h; + uint16_t ether_type, offset; + + struct client_data *client_info; + + /* + * We create transmit buffers for every slave and one additional to send + * through tlb. In worst case every packet will be send on one port. + */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts]; + uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 }; + + /* + * We create separate transmit buffers for update packets as they won't + * be counted in num_tx_total. + */ + struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE]; + uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 }; + + struct rte_mbuf *upd_pkt; + size_t pkt_size; + + uint16_t num_send, num_not_send = 0; + uint16_t num_tx_total = 0; + uint16_t slave_idx; + + int i, j; + + /* Search tx buffer for ARP packets and forward them to alb */ + for (i = 0; i < nb_pkts; i++) { + eth_h = rte_pktmbuf_mtod(bufs[i], struct rte_ether_hdr *); + ether_type = eth_h->ether_type; + offset = get_vlan_offset(eth_h, ðer_type); + + if (ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { + slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals); + + /* Change src mac in eth header */ + rte_eth_macaddr_get(slave_idx, ð_h->s_addr); + + /* Add packet to slave tx buffer */ + slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i]; + slave_bufs_pkts[slave_idx]++; + } else { + /* If packet is not ARP, send it with TLB policy */ + slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] = + bufs[i]; + slave_bufs_pkts[RTE_MAX_ETHPORTS]++; + } + } + + /* Update connected client ARP tables */ + if (internals->mode6.ntt) { + for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) { + client_info = &internals->mode6.client_table[i]; + + if (client_info->in_use) { + /* Allocate new packet to send ARP update on current slave */ + upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool); + if (upd_pkt == NULL) { + RTE_BOND_LOG(ERR, + "Failed to allocate ARP packet from pool"); + continue; + } + pkt_size = sizeof(struct rte_ether_hdr) + + sizeof(struct rte_arp_hdr) + + client_info->vlan_count * + sizeof(struct rte_vlan_hdr); + upd_pkt->data_len = pkt_size; + upd_pkt->pkt_len = pkt_size; + + slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt, + internals); + + /* Add packet to update tx buffer */ + update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt; + update_bufs_pkts[slave_idx]++; + } + } + internals->mode6.ntt = 0; + } + + /* Send ARP packets on proper slaves */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (slave_bufs_pkts[i] > 0) { + num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, + slave_bufs[i], slave_bufs_pkts[i]); + for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) { + bufs[nb_pkts - 1 - num_not_send - j] = + slave_bufs[i][nb_pkts - 1 - j]; + } + + num_tx_total += num_send; + num_not_send += slave_bufs_pkts[i] - num_send; + +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + /* Print TX stats including update packets */ + for (j = 0; j < slave_bufs_pkts[i]; j++) { + eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], + struct rte_ether_hdr *); + mode6_debug("TX ARP:", eth_h, i, &burstnumberTX); + } +#endif + } + } + + /* Send update packets on proper slaves */ + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (update_bufs_pkts[i] > 0) { + num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i], + update_bufs_pkts[i]); + for (j = num_send; j < update_bufs_pkts[i]; j++) { + rte_pktmbuf_free(update_bufs[i][j]); + } +#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1) + for (j = 0; j < update_bufs_pkts[i]; j++) { + eth_h = rte_pktmbuf_mtod(update_bufs[i][j], + struct rte_ether_hdr *); + mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX); + } +#endif + } + } + + /* Send non-ARP packets using tlb policy */ + if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) { + num_send = bond_ethdev_tx_burst_tlb(queue, + slave_bufs[RTE_MAX_ETHPORTS], + slave_bufs_pkts[RTE_MAX_ETHPORTS]); + + for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) { + bufs[nb_pkts - 1 - num_not_send - j] = + slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j]; + } + + num_tx_total += num_send; + } + + return num_tx_total; +} + +static inline uint16_t +tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs, + uint16_t *slave_port_ids, uint16_t slave_count) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + + /* Array to sort mbufs for transmission on each slave into */ + struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs]; + /* Number of mbufs for transmission on each slave */ + uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 }; + /* Mapping array generated by hash function to map mbufs to slaves */ + uint16_t bufs_slave_port_idxs[nb_bufs]; + + uint16_t slave_tx_count; + uint16_t total_tx_count = 0, total_tx_fail_count = 0; + + uint16_t i; + + /* + * Populate slaves mbuf with the packets which are to be sent on it + * selecting output slave using hash based on xmit policy + */ + internals->burst_xmit_hash(bufs, nb_bufs, slave_count, + bufs_slave_port_idxs); + + for (i = 0; i < nb_bufs; i++) { + /* Populate slave mbuf arrays with mbufs for that slave. */ + uint16_t slave_idx = bufs_slave_port_idxs[i]; + + slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i]; + } + + /* Send packet burst on each slave device */ + for (i = 0; i < slave_count; i++) { + if (slave_nb_bufs[i] == 0) + continue; + + slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], + bd_tx_q->queue_id, slave_bufs[i], + slave_nb_bufs[i]); + + total_tx_count += slave_tx_count; + + /* If tx burst fails move packets to end of bufs */ + if (unlikely(slave_tx_count < slave_nb_bufs[i])) { + int slave_tx_fail_count = slave_nb_bufs[i] - + slave_tx_count; + total_tx_fail_count += slave_tx_fail_count; + memcpy(&bufs[nb_bufs - total_tx_fail_count], + &slave_bufs[i][slave_tx_count], + slave_tx_fail_count * sizeof(bufs[0])); + } + } + + return total_tx_count; +} + +static uint16_t +bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, + uint16_t nb_bufs) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; + + if (unlikely(nb_bufs == 0)) + return 0; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting + */ + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; + + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); + return tx_burst_balance(queue, bufs, nb_bufs, slave_port_ids, + slave_count); +} + +static inline uint16_t +tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_bufs, + bool dedicated_txq) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + + uint16_t slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t slave_count; + + uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS]; + uint16_t dist_slave_count; + + uint16_t slave_tx_count; + + uint16_t i; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + slave_count = internals->active_slave_count; + if (unlikely(slave_count < 1)) + return 0; + + memcpy(slave_port_ids, internals->active_slaves, + sizeof(slave_port_ids[0]) * slave_count); + + if (dedicated_txq) + goto skip_tx_ring; + + /* Check for LACP control packets and send if available */ + for (i = 0; i < slave_count; i++) { + struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]]; + struct rte_mbuf *ctrl_pkt = NULL; + + if (likely(rte_ring_empty(port->tx_ring))) + continue; + + if (rte_ring_dequeue(port->tx_ring, + (void **)&ctrl_pkt) != -ENOENT) { + slave_tx_count = rte_eth_tx_burst(slave_port_ids[i], + bd_tx_q->queue_id, &ctrl_pkt, 1); + /* + * re-enqueue LAG control plane packets to buffering + * ring if transmission fails so the packet isn't lost. + */ + if (slave_tx_count != 1) + rte_ring_enqueue(port->tx_ring, ctrl_pkt); + } + } + +skip_tx_ring: + if (unlikely(nb_bufs == 0)) + return 0; + + dist_slave_count = 0; + for (i = 0; i < slave_count; i++) { + struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]]; + + if (ACTOR_STATE(port, DISTRIBUTING)) + dist_slave_port_ids[dist_slave_count++] = + slave_port_ids[i]; + } + + if (unlikely(dist_slave_count < 1)) + return 0; + + return tx_burst_balance(queue, bufs, nb_bufs, dist_slave_port_ids, + dist_slave_count); +} + +static uint16_t +bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs, + uint16_t nb_bufs) +{ + return tx_burst_8023ad(queue, bufs, nb_bufs, false); +} + +static uint16_t +bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, + uint16_t nb_bufs) +{ + return tx_burst_8023ad(queue, bufs, nb_bufs, true); +} + +static uint16_t +bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs, + uint16_t nb_pkts) +{ + struct bond_dev_private *internals; + struct bond_tx_queue *bd_tx_q; + + uint16_t slaves[RTE_MAX_ETHPORTS]; + uint8_t tx_failed_flag = 0; + uint16_t num_of_slaves; + + uint16_t max_nb_of_tx_pkts = 0; + + int slave_tx_total[RTE_MAX_ETHPORTS]; + int i, most_successful_tx_slave = -1; + + bd_tx_q = (struct bond_tx_queue *)queue; + internals = bd_tx_q->dev_private; + + /* Copy slave list to protect against slave up/down changes during tx + * bursting */ + num_of_slaves = internals->active_slave_count; + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * num_of_slaves); + + if (num_of_slaves < 1) + return 0; + + /* Increment reference count on mbufs */ + for (i = 0; i < nb_pkts; i++) + rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1); + + /* Transmit burst on each active slave */ + for (i = 0; i < num_of_slaves; i++) { + slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + bufs, nb_pkts); + + if (unlikely(slave_tx_total[i] < nb_pkts)) + tx_failed_flag = 1; + + /* record the value and slave index for the slave which transmits the + * maximum number of packets */ + if (slave_tx_total[i] > max_nb_of_tx_pkts) { + max_nb_of_tx_pkts = slave_tx_total[i]; + most_successful_tx_slave = i; + } + } + + /* if slaves fail to transmit packets from burst, the calling application + * is not expected to know about multiple references to packets so we must + * handle failures of all packets except those of the most successful slave + */ + if (unlikely(tx_failed_flag)) + for (i = 0; i < num_of_slaves; i++) + if (i != most_successful_tx_slave) + while (slave_tx_total[i] < nb_pkts) + rte_pktmbuf_free(bufs[slave_tx_total[i]++]); + + return max_nb_of_tx_pkts; +} + +static void +link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link) +{ + struct bond_dev_private *bond_ctx = ethdev->data->dev_private; + + if (bond_ctx->mode == BONDING_MODE_8023AD) { + /** + * If in mode 4 then save the link properties of the first + * slave, all subsequent slaves must match these properties + */ + struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link; + + bond_link->link_autoneg = slave_link->link_autoneg; + bond_link->link_duplex = slave_link->link_duplex; + bond_link->link_speed = slave_link->link_speed; + } else { + /** + * In any other mode the link properties are set to default + * values of AUTONEG/DUPLEX + */ + ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG; + ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; + } +} + +static int +link_properties_valid(struct rte_eth_dev *ethdev, + struct rte_eth_link *slave_link) +{ + struct bond_dev_private *bond_ctx = ethdev->data->dev_private; + + if (bond_ctx->mode == BONDING_MODE_8023AD) { + struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link; + + if (bond_link->link_duplex != slave_link->link_duplex || + bond_link->link_autoneg != slave_link->link_autoneg || + bond_link->link_speed != slave_link->link_speed) + return -1; + } + + return 0; +} + +int +mac_address_get(struct rte_eth_dev *eth_dev, + struct rte_ether_addr *dst_mac_addr) +{ + struct rte_ether_addr *mac_addr; + + if (eth_dev == NULL) { + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); + return -1; + } + + if (dst_mac_addr == NULL) { + RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); + return -1; + } + + mac_addr = eth_dev->data->mac_addrs; + + rte_ether_addr_copy(mac_addr, dst_mac_addr); + return 0; +} + +int +mac_address_set(struct rte_eth_dev *eth_dev, + struct rte_ether_addr *new_mac_addr) +{ + struct rte_ether_addr *mac_addr; + + if (eth_dev == NULL) { + RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified"); + return -1; + } + + if (new_mac_addr == NULL) { + RTE_BOND_LOG(ERR, "NULL pointer MAC specified"); + return -1; + } + + mac_addr = eth_dev->data->mac_addrs; + + /* If new MAC is different to current MAC then update */ + if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0) + memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr)); + + return 0; +} + +static const struct rte_ether_addr null_mac_addr; + +/* + * Add additional MAC addresses to the slave + */ +int +slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev, + uint16_t slave_port_id) +{ + int i, ret; + struct rte_ether_addr *mac_addr; + + for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) { + mac_addr = &bonded_eth_dev->data->mac_addrs[i]; + if (rte_is_same_ether_addr(mac_addr, &null_mac_addr)) + break; + + ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0); + if (ret < 0) { + /* rollback */ + for (i--; i > 0; i--) + rte_eth_dev_mac_addr_remove(slave_port_id, + &bonded_eth_dev->data->mac_addrs[i]); + return ret; + } + } + + return 0; +} + +/* + * Remove additional MAC addresses from the slave + */ +int +slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev, + uint16_t slave_port_id) +{ + int i, rc, ret; + struct rte_ether_addr *mac_addr; + + rc = 0; + for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) { + mac_addr = &bonded_eth_dev->data->mac_addrs[i]; + if (rte_is_same_ether_addr(mac_addr, &null_mac_addr)) + break; + + ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr); + /* save only the first error */ + if (ret < 0 && rc == 0) + rc = ret; + } + + return rc; +} + +int +mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) +{ + struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; + int i; + + /* Update slave devices MAC addresses */ + if (internals->slave_count < 1) + return -1; + + switch (internals->mode) { + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + for (i = 0; i < internals->slave_count; i++) { + if (rte_eth_dev_default_mac_addr_set( + internals->slaves[i].port_id, + bonded_eth_dev->data->mac_addrs)) { + RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", + internals->slaves[i].port_id); + return -1; + } + } + break; + case BONDING_MODE_8023AD: + bond_mode_8023ad_mac_address_update(bonded_eth_dev); + break; + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + for (i = 0; i < internals->slave_count; i++) { + if (internals->slaves[i].port_id == + internals->current_primary_port) { + if (rte_eth_dev_default_mac_addr_set( + internals->primary_port, + bonded_eth_dev->data->mac_addrs)) { + RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", + internals->current_primary_port); + return -1; + } + } else { + if (rte_eth_dev_default_mac_addr_set( + internals->slaves[i].port_id, + &internals->slaves[i].persisted_mac_addr)) { + RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address", + internals->slaves[i].port_id); + return -1; + } + } + } + } + + return 0; +} + +int +bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) +{ + struct bond_dev_private *internals; + + internals = eth_dev->data->dev_private; + + switch (mode) { + case BONDING_MODE_ROUND_ROBIN: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; + break; + case BONDING_MODE_ACTIVE_BACKUP: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; + break; + case BONDING_MODE_BALANCE: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; + break; + case BONDING_MODE_BROADCAST: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; + break; + case BONDING_MODE_8023AD: + if (bond_mode_8023ad_enable(eth_dev) != 0) + return -1; + + if (internals->mode4.dedicated_queues.enabled == 0) { + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad; + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad; + RTE_BOND_LOG(WARNING, + "Using mode 4, it is necessary to do TX burst " + "and RX burst at least every 100ms."); + } else { + /* Use flow director's optimization */ + eth_dev->rx_pkt_burst = + bond_ethdev_rx_burst_8023ad_fast_queue; + eth_dev->tx_pkt_burst = + bond_ethdev_tx_burst_8023ad_fast_queue; + } + break; + case BONDING_MODE_TLB: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; + break; + case BONDING_MODE_ALB: + if (bond_mode_alb_enable(eth_dev) != 0) + return -1; + + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb; + break; + default: + return -1; + } + + internals->mode = mode; + + return 0; +} + + +static int +slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev, + struct rte_eth_dev *slave_eth_dev) +{ + int errval = 0; + struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; + struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id]; + + if (port->slow_pool == NULL) { + char mem_name[256]; + int slave_id = slave_eth_dev->data->port_id; + + snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool", + slave_id); + port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191, + 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE, + slave_eth_dev->data->numa_node); + + /* Any memory allocation failure in initialization is critical because + * resources can't be free, so reinitialization is impossible. */ + if (port->slow_pool == NULL) { + rte_panic("Slave %u: Failed to create memory pool '%s': %s\n", + slave_id, mem_name, rte_strerror(rte_errno)); + } + } + + if (internals->mode4.dedicated_queues.enabled == 1) { + /* Configure slow Rx queue */ + + errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, + internals->mode4.dedicated_queues.rx_qid, 128, + rte_eth_dev_socket_id(slave_eth_dev->data->port_id), + NULL, port->slow_pool); + if (errval != 0) { + RTE_BOND_LOG(ERR, + "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", + slave_eth_dev->data->port_id, + internals->mode4.dedicated_queues.rx_qid, + errval); + return errval; + } + + errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, + internals->mode4.dedicated_queues.tx_qid, 512, + rte_eth_dev_socket_id(slave_eth_dev->data->port_id), + NULL); + if (errval != 0) { + RTE_BOND_LOG(ERR, + "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", + slave_eth_dev->data->port_id, + internals->mode4.dedicated_queues.tx_qid, + errval); + return errval; + } + } + return 0; +} + +int +slave_configure(struct rte_eth_dev *bonded_eth_dev, + struct rte_eth_dev *slave_eth_dev) +{ + struct bond_rx_queue *bd_rx_q; + struct bond_tx_queue *bd_tx_q; + uint16_t nb_rx_queues; + uint16_t nb_tx_queues; + + int errval; + uint16_t q_id; + struct rte_flow_error flow_error; + + struct bond_dev_private *internals = bonded_eth_dev->data->dev_private; + + /* Stop slave */ + rte_eth_dev_stop(slave_eth_dev->data->port_id); + + /* Enable interrupts on slave device if supported */ + if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) + slave_eth_dev->data->dev_conf.intr_conf.lsc = 1; + + /* If RSS is enabled for bonding, try to enable it for slaves */ + if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) { + if (internals->rss_key_len != 0) { + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = + internals->rss_key_len; + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = + internals->rss_key; + } else { + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL; + } + + slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = + bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; + slave_eth_dev->data->dev_conf.rxmode.mq_mode = + bonded_eth_dev->data->dev_conf.rxmode.mq_mode; + } + + if (bonded_eth_dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER) + slave_eth_dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_FILTER; + else + slave_eth_dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_FILTER; + + nb_rx_queues = bonded_eth_dev->data->nb_rx_queues; + nb_tx_queues = bonded_eth_dev->data->nb_tx_queues; + + if (internals->mode == BONDING_MODE_8023AD) { + if (internals->mode4.dedicated_queues.enabled == 1) { + nb_rx_queues++; + nb_tx_queues++; + } + } + + errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id, + bonded_eth_dev->data->mtu); + if (errval != 0 && errval != -ENOTSUP) { + RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)", + slave_eth_dev->data->port_id, errval); + return errval; + } + + /* Configure device */ + errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, + nb_rx_queues, nb_tx_queues, + &(slave_eth_dev->data->dev_conf)); + if (errval != 0) { + RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)", + slave_eth_dev->data->port_id, errval); + return errval; + } + + /* Setup Rx Queues */ + for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { + bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id]; + + errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, + bd_rx_q->nb_rx_desc, + rte_eth_dev_socket_id(slave_eth_dev->data->port_id), + &(bd_rx_q->rx_conf), bd_rx_q->mb_pool); + if (errval != 0) { + RTE_BOND_LOG(ERR, + "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)", + slave_eth_dev->data->port_id, q_id, errval); + return errval; + } + } + + /* Setup Tx Queues */ + for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { + bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id]; + + errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, + bd_tx_q->nb_tx_desc, + rte_eth_dev_socket_id(slave_eth_dev->data->port_id), + &bd_tx_q->tx_conf); + if (errval != 0) { + RTE_BOND_LOG(ERR, + "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", + slave_eth_dev->data->port_id, q_id, errval); + return errval; + } + } + + if (internals->mode == BONDING_MODE_8023AD && + internals->mode4.dedicated_queues.enabled == 1) { + if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev) + != 0) + return errval; + + if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev, + slave_eth_dev->data->port_id) != 0) { + RTE_BOND_LOG(ERR, + "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)", + slave_eth_dev->data->port_id, q_id, errval); + return -1; + } + + if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL) + rte_flow_destroy(slave_eth_dev->data->port_id, + internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id], + &flow_error); + + bond_ethdev_8023ad_flow_set(bonded_eth_dev, + slave_eth_dev->data->port_id); + } + + /* Start device */ + errval = rte_eth_dev_start(slave_eth_dev->data->port_id); + if (errval != 0) { + RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)", + slave_eth_dev->data->port_id, errval); + return -1; + } + + /* If RSS is enabled for bonding, synchronize RETA */ + if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { + int i; + struct bond_dev_private *internals; + + internals = bonded_eth_dev->data->dev_private; + + for (i = 0; i < internals->slave_count; i++) { + if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) { + errval = rte_eth_dev_rss_reta_update( + slave_eth_dev->data->port_id, + &internals->reta_conf[0], + internals->slaves[i].reta_size); + if (errval != 0) { + RTE_BOND_LOG(WARNING, + "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)." + " RSS Configuration for bonding may be inconsistent.", + slave_eth_dev->data->port_id, errval); + } + break; + } + } + } + + /* If lsc interrupt is set, check initial slave's link status */ + if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) { + slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0); + bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id, + RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id, + NULL); + } + + return 0; +} + +void +slave_remove(struct bond_dev_private *internals, + struct rte_eth_dev *slave_eth_dev) +{ + uint16_t i; + + for (i = 0; i < internals->slave_count; i++) + if (internals->slaves[i].port_id == + slave_eth_dev->data->port_id) + break; + + if (i < (internals->slave_count - 1)) { + struct rte_flow *flow; + + memmove(&internals->slaves[i], &internals->slaves[i + 1], + sizeof(internals->slaves[0]) * + (internals->slave_count - i - 1)); + TAILQ_FOREACH(flow, &internals->flow_list, next) { + memmove(&flow->flows[i], &flow->flows[i + 1], + sizeof(flow->flows[0]) * + (internals->slave_count - i - 1)); + flow->flows[internals->slave_count - 1] = NULL; + } + } + + internals->slave_count--; + + /* force reconfiguration of slave interfaces */ + _rte_eth_dev_reset(slave_eth_dev); +} + +static void +bond_ethdev_slave_link_status_change_monitor(void *cb_arg); + +void +slave_add(struct bond_dev_private *internals, + struct rte_eth_dev *slave_eth_dev) +{ + struct bond_slave_details *slave_details = + &internals->slaves[internals->slave_count]; + + slave_details->port_id = slave_eth_dev->data->port_id; + slave_details->last_link_status = 0; + + /* Mark slave devices that don't support interrupts so we can + * compensate when we start the bond + */ + if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { + slave_details->link_status_poll_enabled = 1; + } + + slave_details->link_status_wait_to_complete = 0; + /* clean tlb_last_obytes when adding port for bonding device */ + memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs, + sizeof(struct rte_ether_addr)); +} + +void +bond_ethdev_primary_set(struct bond_dev_private *internals, + uint16_t slave_port_id) +{ + int i; + + if (internals->active_slave_count < 1) + internals->current_primary_port = slave_port_id; + else + /* Search bonded device slave ports for new proposed primary port */ + for (i = 0; i < internals->active_slave_count; i++) { + if (internals->active_slaves[i] == slave_port_id) + internals->current_primary_port = slave_port_id; + } +} + +static int +bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev); + +static int +bond_ethdev_start(struct rte_eth_dev *eth_dev) +{ + struct bond_dev_private *internals; + int i; + + /* slave eth dev will be started by bonded device */ + if (check_for_bonded_ethdev(eth_dev)) { + RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)", + eth_dev->data->port_id); + return -1; + } + + eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; + eth_dev->data->dev_started = 1; + + internals = eth_dev->data->dev_private; + + if (internals->slave_count == 0) { + RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices"); + goto out_err; + } + + if (internals->user_defined_mac == 0) { + struct rte_ether_addr *new_mac_addr = NULL; + + for (i = 0; i < internals->slave_count; i++) + if (internals->slaves[i].port_id == internals->primary_port) + new_mac_addr = &internals->slaves[i].persisted_mac_addr; + + if (new_mac_addr == NULL) + goto out_err; + + if (mac_address_set(eth_dev, new_mac_addr) != 0) { + RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address", + eth_dev->data->port_id); + goto out_err; + } + } + + if (internals->mode == BONDING_MODE_8023AD) { + if (internals->mode4.dedicated_queues.enabled == 1) { + internals->mode4.dedicated_queues.rx_qid = + eth_dev->data->nb_rx_queues; + internals->mode4.dedicated_queues.tx_qid = + eth_dev->data->nb_tx_queues; + } + } + + + /* Reconfigure each slave device if starting bonded device */ + for (i = 0; i < internals->slave_count; i++) { + struct rte_eth_dev *slave_ethdev = + &(rte_eth_devices[internals->slaves[i].port_id]); + if (slave_configure(eth_dev, slave_ethdev) != 0) { + RTE_BOND_LOG(ERR, + "bonded port (%d) failed to reconfigure slave device (%d)", + eth_dev->data->port_id, + internals->slaves[i].port_id); + goto out_err; + } + /* We will need to poll for link status if any slave doesn't + * support interrupts + */ + if (internals->slaves[i].link_status_poll_enabled) + internals->link_status_polling_enabled = 1; + } + + /* start polling if needed */ + if (internals->link_status_polling_enabled) { + rte_eal_alarm_set( + internals->link_status_polling_interval_ms * 1000, + bond_ethdev_slave_link_status_change_monitor, + (void *)&rte_eth_devices[internals->port_id]); + } + + /* Update all slave devices MACs*/ + if (mac_address_slaves_update(eth_dev) != 0) + goto out_err; + + if (internals->user_defined_primary_port) + bond_ethdev_primary_set(internals, internals->primary_port); + + if (internals->mode == BONDING_MODE_8023AD) + bond_mode_8023ad_start(eth_dev); + + if (internals->mode == BONDING_MODE_TLB || + internals->mode == BONDING_MODE_ALB) + bond_tlb_enable(internals); + + return 0; + +out_err: + eth_dev->data->dev_started = 0; + return -1; +} + +static void +bond_ethdev_free_queues(struct rte_eth_dev *dev) +{ + uint16_t i; + + if (dev->data->rx_queues != NULL) { + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rte_free(dev->data->rx_queues[i]); + dev->data->rx_queues[i] = NULL; + } + dev->data->nb_rx_queues = 0; + } + + if (dev->data->tx_queues != NULL) { + for (i = 0; i < dev->data->nb_tx_queues; i++) { + rte_free(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; + } + dev->data->nb_tx_queues = 0; + } +} + +void +bond_ethdev_stop(struct rte_eth_dev *eth_dev) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + uint16_t i; + + if (internals->mode == BONDING_MODE_8023AD) { + struct port *port; + void *pkt = NULL; + + bond_mode_8023ad_stop(eth_dev); + + /* Discard all messages to/from mode 4 state machines */ + for (i = 0; i < internals->active_slave_count; i++) { + port = &bond_mode_8023ad_ports[internals->active_slaves[i]]; + + RTE_ASSERT(port->rx_ring != NULL); + while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT) + rte_pktmbuf_free(pkt); + + RTE_ASSERT(port->tx_ring != NULL); + while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT) + rte_pktmbuf_free(pkt); + } + } + + if (internals->mode == BONDING_MODE_TLB || + internals->mode == BONDING_MODE_ALB) { + bond_tlb_disable(internals); + for (i = 0; i < internals->active_slave_count; i++) + tlb_last_obytets[internals->active_slaves[i]] = 0; + } + + eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; + eth_dev->data->dev_started = 0; + + internals->link_status_polling_enabled = 0; + for (i = 0; i < internals->slave_count; i++) { + uint16_t slave_id = internals->slaves[i].port_id; + if (find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) != + internals->active_slave_count) { + internals->slaves[i].last_link_status = 0; + rte_eth_dev_stop(slave_id); + deactivate_slave(eth_dev, slave_id); + } + } +} + +void +bond_ethdev_close(struct rte_eth_dev *dev) +{ + struct bond_dev_private *internals = dev->data->dev_private; + uint16_t bond_port_id = internals->port_id; + int skipped = 0; + struct rte_flow_error ferror; + + RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name); + while (internals->slave_count != skipped) { + uint16_t port_id = internals->slaves[skipped].port_id; + + rte_eth_dev_stop(port_id); + + if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) { + RTE_BOND_LOG(ERR, + "Failed to remove port %d from bonded device %s", + port_id, dev->device->name); + skipped++; + } + } + bond_flow_ops.flush(dev, &ferror); + bond_ethdev_free_queues(dev); + rte_bitmap_reset(internals->vlan_filter_bmp); +} + +/* forward declaration */ +static int bond_ethdev_configure(struct rte_eth_dev *dev); + +static int +bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) +{ + struct bond_dev_private *internals = dev->data->dev_private; + struct bond_slave_details slave; + int ret; + + uint16_t max_nb_rx_queues = UINT16_MAX; + uint16_t max_nb_tx_queues = UINT16_MAX; + uint16_t max_rx_desc_lim = UINT16_MAX; + uint16_t max_tx_desc_lim = UINT16_MAX; + + dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS; + + dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ? + internals->candidate_max_rx_pktlen : + RTE_ETHER_MAX_JUMBO_FRAME_LEN; + + /* Max number of tx/rx queues that the bonded device can support is the + * minimum values of the bonded slaves, as all slaves must be capable + * of supporting the same number of tx/rx queues. + */ + if (internals->slave_count > 0) { + struct rte_eth_dev_info slave_info; + uint16_t idx; + + for (idx = 0; idx < internals->slave_count; idx++) { + slave = internals->slaves[idx]; + ret = rte_eth_dev_info_get(slave.port_id, &slave_info); + if (ret != 0) { + RTE_BOND_LOG(ERR, + "%s: Error during getting device (port %u) info: %s\n", + __func__, + slave.port_id, + strerror(-ret)); + + return ret; + } + + if (slave_info.max_rx_queues < max_nb_rx_queues) + max_nb_rx_queues = slave_info.max_rx_queues; + + if (slave_info.max_tx_queues < max_nb_tx_queues) + max_nb_tx_queues = slave_info.max_tx_queues; + + if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim) + max_rx_desc_lim = slave_info.rx_desc_lim.nb_max; + + if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim) + max_tx_desc_lim = slave_info.tx_desc_lim.nb_max; + } + } + + dev_info->max_rx_queues = max_nb_rx_queues; + dev_info->max_tx_queues = max_nb_tx_queues; + + memcpy(&dev_info->default_rxconf, &internals->default_rxconf, + sizeof(dev_info->default_rxconf)); + memcpy(&dev_info->default_txconf, &internals->default_txconf, + sizeof(dev_info->default_txconf)); + + dev_info->rx_desc_lim.nb_max = max_rx_desc_lim; + dev_info->tx_desc_lim.nb_max = max_tx_desc_lim; + + /** + * If dedicated hw queues enabled for link bonding device in LACP mode + * then we need to reduce the maximum number of data path queues by 1. + */ + if (internals->mode == BONDING_MODE_8023AD && + internals->mode4.dedicated_queues.enabled == 1) { + dev_info->max_rx_queues--; + dev_info->max_tx_queues--; + } + + dev_info->min_rx_bufsize = 0; + + dev_info->rx_offload_capa = internals->rx_offload_capa; + dev_info->tx_offload_capa = internals->tx_offload_capa; + dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa; + dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa; + dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; + + dev_info->reta_size = internals->reta_size; + + return 0; +} + +static int +bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) +{ + int res; + uint16_t i; + struct bond_dev_private *internals = dev->data->dev_private; + + /* don't do this while a slave is being added */ + rte_spinlock_lock(&internals->lock); + + if (on) + rte_bitmap_set(internals->vlan_filter_bmp, vlan_id); + else + rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id); + + for (i = 0; i < internals->slave_count; i++) { + uint16_t port_id = internals->slaves[i].port_id; + + res = rte_eth_dev_vlan_filter(port_id, vlan_id, on); + if (res == ENOTSUP) + RTE_BOND_LOG(WARNING, + "Setting VLAN filter on slave port %u not supported.", + port_id); + } + + rte_spinlock_unlock(&internals->lock); + return 0; +} + +static int +bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id __rte_unused, + const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool) +{ + struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *) + rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue), + 0, dev->data->numa_node); + if (bd_rx_q == NULL) + return -1; + + bd_rx_q->queue_id = rx_queue_id; + bd_rx_q->dev_private = dev->data->dev_private; + + bd_rx_q->nb_rx_desc = nb_rx_desc; + + memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf)); + bd_rx_q->mb_pool = mb_pool; + + dev->data->rx_queues[rx_queue_id] = bd_rx_q; + + return 0; +} + +static int +bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id __rte_unused, + const struct rte_eth_txconf *tx_conf) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *) + rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue), + 0, dev->data->numa_node); + + if (bd_tx_q == NULL) + return -1; + + bd_tx_q->queue_id = tx_queue_id; + bd_tx_q->dev_private = dev->data->dev_private; + + bd_tx_q->nb_tx_desc = nb_tx_desc; + memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf)); + + dev->data->tx_queues[tx_queue_id] = bd_tx_q; + + return 0; +} + +static void +bond_ethdev_rx_queue_release(void *queue) +{ + if (queue == NULL) + return; + + rte_free(queue); +} + +static void +bond_ethdev_tx_queue_release(void *queue) +{ + if (queue == NULL) + return; + + rte_free(queue); +} + +static void +bond_ethdev_slave_link_status_change_monitor(void *cb_arg) +{ + struct rte_eth_dev *bonded_ethdev, *slave_ethdev; + struct bond_dev_private *internals; + + /* Default value for polling slave found is true as we don't want to + * disable the polling thread if we cannot get the lock */ + int i, polling_slave_found = 1; + + if (cb_arg == NULL) + return; + + bonded_ethdev = cb_arg; + internals = bonded_ethdev->data->dev_private; + + if (!bonded_ethdev->data->dev_started || + !internals->link_status_polling_enabled) + return; + + /* If device is currently being configured then don't check slaves link + * status, wait until next period */ + if (rte_spinlock_trylock(&internals->lock)) { + if (internals->slave_count > 0) + polling_slave_found = 0; + + for (i = 0; i < internals->slave_count; i++) { + if (!internals->slaves[i].link_status_poll_enabled) + continue; + + slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id]; + polling_slave_found = 1; + + /* Update slave link status */ + (*slave_ethdev->dev_ops->link_update)(slave_ethdev, + internals->slaves[i].link_status_wait_to_complete); + + /* if link status has changed since last checked then call lsc + * event callback */ + if (slave_ethdev->data->dev_link.link_status != + internals->slaves[i].last_link_status) { + internals->slaves[i].last_link_status = + slave_ethdev->data->dev_link.link_status; + + bond_ethdev_lsc_event_callback(internals->slaves[i].port_id, + RTE_ETH_EVENT_INTR_LSC, + &bonded_ethdev->data->port_id, + NULL); + } + } + rte_spinlock_unlock(&internals->lock); + } + + if (polling_slave_found) + /* Set alarm to continue monitoring link status of slave ethdev's */ + rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000, + bond_ethdev_slave_link_status_change_monitor, cb_arg); +} + +static int +bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete) +{ + int (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link); + + struct bond_dev_private *bond_ctx; + struct rte_eth_link slave_link; + + bool one_link_update_succeeded; + uint32_t idx; + int ret; + + bond_ctx = ethdev->data->dev_private; + + ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE; + + if (ethdev->data->dev_started == 0 || + bond_ctx->active_slave_count == 0) { + ethdev->data->dev_link.link_status = ETH_LINK_DOWN; + return 0; + } + + ethdev->data->dev_link.link_status = ETH_LINK_UP; + + if (wait_to_complete) + link_update = rte_eth_link_get; + else + link_update = rte_eth_link_get_nowait; + + switch (bond_ctx->mode) { + case BONDING_MODE_BROADCAST: + /** + * Setting link speed to UINT32_MAX to ensure we pick up the + * value of the first active slave + */ + ethdev->data->dev_link.link_speed = UINT32_MAX; + + /** + * link speed is minimum value of all the slaves link speed as + * packet loss will occur on this slave if transmission at rates + * greater than this are attempted + */ + for (idx = 0; idx < bond_ctx->active_slave_count; idx++) { + ret = link_update(bond_ctx->active_slaves[idx], + &slave_link); + if (ret < 0) { + ethdev->data->dev_link.link_speed = + ETH_SPEED_NUM_NONE; + RTE_BOND_LOG(ERR, + "Slave (port %u) link get failed: %s", + bond_ctx->active_slaves[idx], + rte_strerror(-ret)); + return 0; + } + + if (slave_link.link_speed < + ethdev->data->dev_link.link_speed) + ethdev->data->dev_link.link_speed = + slave_link.link_speed; + } + break; + case BONDING_MODE_ACTIVE_BACKUP: + /* Current primary slave */ + ret = link_update(bond_ctx->current_primary_port, &slave_link); + if (ret < 0) { + RTE_BOND_LOG(ERR, "Slave (port %u) link get failed: %s", + bond_ctx->current_primary_port, + rte_strerror(-ret)); + return 0; + } + + ethdev->data->dev_link.link_speed = slave_link.link_speed; + break; + case BONDING_MODE_8023AD: + ethdev->data->dev_link.link_autoneg = + bond_ctx->mode4.slave_link.link_autoneg; + ethdev->data->dev_link.link_duplex = + bond_ctx->mode4.slave_link.link_duplex; + /* fall through */ + /* to update link speed */ + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + /** + * In theses mode the maximum theoretical link speed is the sum + * of all the slaves + */ + ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE; + one_link_update_succeeded = false; + + for (idx = 0; idx < bond_ctx->active_slave_count; idx++) { + ret = link_update(bond_ctx->active_slaves[idx], + &slave_link); + if (ret < 0) { + RTE_BOND_LOG(ERR, + "Slave (port %u) link get failed: %s", + bond_ctx->active_slaves[idx], + rte_strerror(-ret)); + continue; + } + + one_link_update_succeeded = true; + ethdev->data->dev_link.link_speed += + slave_link.link_speed; + } + + if (!one_link_update_succeeded) { + RTE_BOND_LOG(ERR, "All slaves link get failed"); + return 0; + } + } + + + return 0; +} + + +static int +bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_eth_stats slave_stats; + int i, j; + + for (i = 0; i < internals->slave_count; i++) { + rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats); + + stats->ipackets += slave_stats.ipackets; + stats->opackets += slave_stats.opackets; + stats->ibytes += slave_stats.ibytes; + stats->obytes += slave_stats.obytes; + stats->imissed += slave_stats.imissed; + stats->ierrors += slave_stats.ierrors; + stats->oerrors += slave_stats.oerrors; + stats->rx_nombuf += slave_stats.rx_nombuf; + + for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) { + stats->q_ipackets[j] += slave_stats.q_ipackets[j]; + stats->q_opackets[j] += slave_stats.q_opackets[j]; + stats->q_ibytes[j] += slave_stats.q_ibytes[j]; + stats->q_obytes[j] += slave_stats.q_obytes[j]; + stats->q_errors[j] += slave_stats.q_errors[j]; + } + + } + + return 0; +} + +static int +bond_ethdev_stats_reset(struct rte_eth_dev *dev) +{ + struct bond_dev_private *internals = dev->data->dev_private; + int i; + int err; + int ret; + + for (i = 0, err = 0; i < internals->slave_count; i++) { + ret = rte_eth_stats_reset(internals->slaves[i].port_id); + if (ret != 0) + err = ret; + } + + return err; +} + +static int +bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + int i; + int ret = 0; + uint16_t port_id; + + switch (internals->mode) { + /* Promiscuous mode is propagated to all slaves */ + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + case BONDING_MODE_8023AD: { + unsigned int slave_ok = 0; + + for (i = 0; i < internals->slave_count; i++) { + port_id = internals->slaves[i].port_id; + + ret = rte_eth_promiscuous_enable(port_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "Failed to enable promiscuous mode for port %u: %s", + port_id, rte_strerror(-ret)); + else + slave_ok++; + } + /* + * Report success if operation is successful on at least + * on one slave. Otherwise return last error code. + */ + if (slave_ok > 0) + ret = 0; + break; + } + /* Promiscuous mode is propagated only to primary slave */ + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + /* Do not touch promisc when there cannot be primary ports */ + if (internals->slave_count == 0) + break; + port_id = internals->current_primary_port; + ret = rte_eth_promiscuous_enable(port_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "Failed to enable promiscuous mode for port %u: %s", + port_id, rte_strerror(-ret)); + } + + return ret; +} + +static int +bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct bond_dev_private *internals = dev->data->dev_private; + int i; + int ret = 0; + uint16_t port_id; + + switch (internals->mode) { + /* Promiscuous mode is propagated to all slaves */ + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + case BONDING_MODE_8023AD: { + unsigned int slave_ok = 0; + + for (i = 0; i < internals->slave_count; i++) { + port_id = internals->slaves[i].port_id; + + if (internals->mode == BONDING_MODE_8023AD && + bond_mode_8023ad_ports[port_id].forced_rx_flags == + BOND_8023AD_FORCED_PROMISC) { + slave_ok++; + continue; + } + ret = rte_eth_promiscuous_disable(port_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "Failed to disable promiscuous mode for port %u: %s", + port_id, rte_strerror(-ret)); + else + slave_ok++; + } + /* + * Report success if operation is successful on at least + * on one slave. Otherwise return last error code. + */ + if (slave_ok > 0) + ret = 0; + break; + } + /* Promiscuous mode is propagated only to primary slave */ + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + /* Do not touch promisc when there cannot be primary ports */ + if (internals->slave_count == 0) + break; + port_id = internals->current_primary_port; + ret = rte_eth_promiscuous_disable(port_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "Failed to disable promiscuous mode for port %u: %s", + port_id, rte_strerror(-ret)); + } + + return ret; +} + +static int +bond_ethdev_allmulticast_enable(struct rte_eth_dev *eth_dev) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + int i; + int ret = 0; + uint16_t port_id; + + switch (internals->mode) { + /* allmulti mode is propagated to all slaves */ + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + case BONDING_MODE_8023AD: { + unsigned int slave_ok = 0; + + for (i = 0; i < internals->slave_count; i++) { + port_id = internals->slaves[i].port_id; + + ret = rte_eth_allmulticast_enable(port_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "Failed to enable allmulti mode for port %u: %s", + port_id, rte_strerror(-ret)); + else + slave_ok++; + } + /* + * Report success if operation is successful on at least + * on one slave. Otherwise return last error code. + */ + if (slave_ok > 0) + ret = 0; + break; + } + /* allmulti mode is propagated only to primary slave */ + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + /* Do not touch allmulti when there cannot be primary ports */ + if (internals->slave_count == 0) + break; + port_id = internals->current_primary_port; + ret = rte_eth_allmulticast_enable(port_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "Failed to enable allmulti mode for port %u: %s", + port_id, rte_strerror(-ret)); + } + + return ret; +} + +static int +bond_ethdev_allmulticast_disable(struct rte_eth_dev *eth_dev) +{ + struct bond_dev_private *internals = eth_dev->data->dev_private; + int i; + int ret = 0; + uint16_t port_id; + + switch (internals->mode) { + /* allmulti mode is propagated to all slaves */ + case BONDING_MODE_ROUND_ROBIN: + case BONDING_MODE_BALANCE: + case BONDING_MODE_BROADCAST: + case BONDING_MODE_8023AD: { + unsigned int slave_ok = 0; + + for (i = 0; i < internals->slave_count; i++) { + uint16_t port_id = internals->slaves[i].port_id; + + if (internals->mode == BONDING_MODE_8023AD && + bond_mode_8023ad_ports[port_id].forced_rx_flags == + BOND_8023AD_FORCED_ALLMULTI) + continue; + + ret = rte_eth_allmulticast_disable(port_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "Failed to disable allmulti mode for port %u: %s", + port_id, rte_strerror(-ret)); + else + slave_ok++; + } + /* + * Report success if operation is successful on at least + * on one slave. Otherwise return last error code. + */ + if (slave_ok > 0) + ret = 0; + break; + } + /* allmulti mode is propagated only to primary slave */ + case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_TLB: + case BONDING_MODE_ALB: + default: + /* Do not touch allmulti when there cannot be primary ports */ + if (internals->slave_count == 0) + break; + port_id = internals->current_primary_port; + ret = rte_eth_allmulticast_disable(port_id); + if (ret != 0) + RTE_BOND_LOG(ERR, + "Failed to disable allmulti mode for port %u: %s", + port_id, rte_strerror(-ret)); + } + + return ret; +} + +static void +bond_ethdev_delayed_lsc_propagation(void *arg) +{ + if (arg == NULL) + return; + + _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, + RTE_ETH_EVENT_INTR_LSC, NULL); +} + +int +bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type, + void *param, void *ret_param __rte_unused) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + struct rte_eth_link link; + int rc = -1; + int ret; + + uint8_t lsc_flag = 0; + int valid_slave = 0; + uint16_t active_pos; + uint16_t i; + + if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL) + return rc; + + bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param]; + + if (check_for_bonded_ethdev(bonded_eth_dev)) + return rc; + + internals = bonded_eth_dev->data->dev_private; + + /* If the device isn't started don't handle interrupts */ + if (!bonded_eth_dev->data->dev_started) + return rc; + + /* verify that port_id is a valid slave of bonded port */ + for (i = 0; i < internals->slave_count; i++) { + if (internals->slaves[i].port_id == port_id) { + valid_slave = 1; + break; + } + } + + if (!valid_slave) + return rc; + + /* Synchronize lsc callback parallel calls either by real link event + * from the slaves PMDs or by the bonding PMD itself. + */ + rte_spinlock_lock(&internals->lsc_lock); + + /* Search for port in active port list */ + active_pos = find_slave_by_id(internals->active_slaves, + internals->active_slave_count, port_id); + + ret = rte_eth_link_get_nowait(port_id, &link); + if (ret < 0) + RTE_BOND_LOG(ERR, "Slave (port %u) link get failed", port_id); + + if (ret == 0 && link.link_status) { + if (active_pos < internals->active_slave_count) + goto link_update; + + /* check link state properties if bonded link is up*/ + if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) { + if (link_properties_valid(bonded_eth_dev, &link) != 0) + RTE_BOND_LOG(ERR, "Invalid link properties " + "for slave %d in bonding mode %d", + port_id, internals->mode); + } else { + /* inherit slave link properties */ + link_properties_set(bonded_eth_dev, &link); + } + + /* If no active slave ports then set this port to be + * the primary port. + */ + if (internals->active_slave_count < 1) { + /* If first active slave, then change link status */ + bonded_eth_dev->data->dev_link.link_status = + ETH_LINK_UP; + internals->current_primary_port = port_id; + lsc_flag = 1; + + mac_address_slaves_update(bonded_eth_dev); + } + + activate_slave(bonded_eth_dev, port_id); + + /* If the user has defined the primary port then default to + * using it. + */ + if (internals->user_defined_primary_port && + internals->primary_port == port_id) + bond_ethdev_primary_set(internals, port_id); + } else { + if (active_pos == internals->active_slave_count) + goto link_update; + + /* Remove from active slave list */ + deactivate_slave(bonded_eth_dev, port_id); + + if (internals->active_slave_count < 1) + lsc_flag = 1; + + /* Update primary id, take first active slave from list or if none + * available set to -1 */ + if (port_id == internals->current_primary_port) { + if (internals->active_slave_count > 0) + bond_ethdev_primary_set(internals, + internals->active_slaves[0]); + else + internals->current_primary_port = internals->primary_port; + } + } + +link_update: + /** + * Update bonded device link properties after any change to active + * slaves + */ + bond_ethdev_link_update(bonded_eth_dev, 0); + + if (lsc_flag) { + /* Cancel any possible outstanding interrupts if delays are enabled */ + if (internals->link_up_delay_ms > 0 || + internals->link_down_delay_ms > 0) + rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation, + bonded_eth_dev); + + if (bonded_eth_dev->data->dev_link.link_status) { + if (internals->link_up_delay_ms > 0) + rte_eal_alarm_set(internals->link_up_delay_ms * 1000, + bond_ethdev_delayed_lsc_propagation, + (void *)bonded_eth_dev); + else + _rte_eth_dev_callback_process(bonded_eth_dev, + RTE_ETH_EVENT_INTR_LSC, + NULL); + + } else { + if (internals->link_down_delay_ms > 0) + rte_eal_alarm_set(internals->link_down_delay_ms * 1000, + bond_ethdev_delayed_lsc_propagation, + (void *)bonded_eth_dev); + else + _rte_eth_dev_callback_process(bonded_eth_dev, + RTE_ETH_EVENT_INTR_LSC, + NULL); + } + } + + rte_spinlock_unlock(&internals->lsc_lock); + + return rc; +} + +static int +bond_ethdev_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) +{ + unsigned i, j; + int result = 0; + int slave_reta_size; + unsigned reta_count; + struct bond_dev_private *internals = dev->data->dev_private; + + if (reta_size != internals->reta_size) + return -EINVAL; + + /* Copy RETA table */ + reta_count = reta_size / RTE_RETA_GROUP_SIZE; + + for (i = 0; i < reta_count; i++) { + internals->reta_conf[i].mask = reta_conf[i].mask; + for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) + if ((reta_conf[i].mask >> j) & 0x01) + internals->reta_conf[i].reta[j] = reta_conf[i].reta[j]; + } + + /* Fill rest of array */ + for (; i < RTE_DIM(internals->reta_conf); i += reta_count) + memcpy(&internals->reta_conf[i], &internals->reta_conf[0], + sizeof(internals->reta_conf[0]) * reta_count); + + /* Propagate RETA over slaves */ + for (i = 0; i < internals->slave_count; i++) { + slave_reta_size = internals->slaves[i].reta_size; + result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id, + &internals->reta_conf[0], slave_reta_size); + if (result < 0) + return result; + } + + return 0; +} + +static int +bond_ethdev_rss_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size) +{ + int i, j; + struct bond_dev_private *internals = dev->data->dev_private; + + if (reta_size != internals->reta_size) + return -EINVAL; + + /* Copy RETA table */ + for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++) + for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) + if ((reta_conf[i].mask >> j) & 0x01) + reta_conf[i].reta[j] = internals->reta_conf[i].reta[j]; + + return 0; +} + +static int +bond_ethdev_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + int i, result = 0; + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_eth_rss_conf bond_rss_conf; + + memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf)); + + bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads; + + if (bond_rss_conf.rss_hf != 0) + dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf; + + if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len < + sizeof(internals->rss_key)) { + if (bond_rss_conf.rss_key_len == 0) + bond_rss_conf.rss_key_len = 40; + internals->rss_key_len = bond_rss_conf.rss_key_len; + memcpy(internals->rss_key, bond_rss_conf.rss_key, + internals->rss_key_len); + } + + for (i = 0; i < internals->slave_count; i++) { + result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id, + &bond_rss_conf); + if (result < 0) + return result; + } + + return 0; +} + +static int +bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf) +{ + struct bond_dev_private *internals = dev->data->dev_private; + + rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf; + rss_conf->rss_key_len = internals->rss_key_len; + if (rss_conf->rss_key) + memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len); + + return 0; +} + +static int +bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct rte_eth_dev *slave_eth_dev; + struct bond_dev_private *internals = dev->data->dev_private; + int ret, i; + + rte_spinlock_lock(&internals->lock); + + for (i = 0; i < internals->slave_count; i++) { + slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; + if (*slave_eth_dev->dev_ops->mtu_set == NULL) { + rte_spinlock_unlock(&internals->lock); + return -ENOTSUP; + } + } + for (i = 0; i < internals->slave_count; i++) { + ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu); + if (ret < 0) { + rte_spinlock_unlock(&internals->lock); + return ret; + } + } + + rte_spinlock_unlock(&internals->lock); + return 0; +} + +static int +bond_ethdev_mac_address_set(struct rte_eth_dev *dev, + struct rte_ether_addr *addr) +{ + if (mac_address_set(dev, addr)) { + RTE_BOND_LOG(ERR, "Failed to update MAC address"); + return -EINVAL; + } + + return 0; +} + +static int +bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused, + enum rte_filter_type type, enum rte_filter_op op, void *arg) +{ + if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) { + *(const void **)arg = &bond_flow_ops; + return 0; + } + return -ENOTSUP; +} + +static int +bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, + struct rte_ether_addr *mac_addr, + __rte_unused uint32_t index, uint32_t vmdq) +{ + struct rte_eth_dev *slave_eth_dev; + struct bond_dev_private *internals = dev->data->dev_private; + int ret, i; + + rte_spinlock_lock(&internals->lock); + + for (i = 0; i < internals->slave_count; i++) { + slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; + if (*slave_eth_dev->dev_ops->mac_addr_add == NULL || + *slave_eth_dev->dev_ops->mac_addr_remove == NULL) { + ret = -ENOTSUP; + goto end; + } + } + + for (i = 0; i < internals->slave_count; i++) { + ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id, + mac_addr, vmdq); + if (ret < 0) { + /* rollback */ + for (i--; i >= 0; i--) + rte_eth_dev_mac_addr_remove( + internals->slaves[i].port_id, mac_addr); + goto end; + } + } + + ret = 0; +end: + rte_spinlock_unlock(&internals->lock); + return ret; +} + +static void +bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) +{ + struct rte_eth_dev *slave_eth_dev; + struct bond_dev_private *internals = dev->data->dev_private; + int i; + + rte_spinlock_lock(&internals->lock); + + for (i = 0; i < internals->slave_count; i++) { + slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id]; + if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL) + goto end; + } + + struct rte_ether_addr *mac_addr = &dev->data->mac_addrs[index]; + + for (i = 0; i < internals->slave_count; i++) + rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id, + mac_addr); + +end: + rte_spinlock_unlock(&internals->lock); +} + +const struct eth_dev_ops default_dev_ops = { + .dev_start = bond_ethdev_start, + .dev_stop = bond_ethdev_stop, + .dev_close = bond_ethdev_close, + .dev_configure = bond_ethdev_configure, + .dev_infos_get = bond_ethdev_info, + .vlan_filter_set = bond_ethdev_vlan_filter_set, + .rx_queue_setup = bond_ethdev_rx_queue_setup, + .tx_queue_setup = bond_ethdev_tx_queue_setup, + .rx_queue_release = bond_ethdev_rx_queue_release, + .tx_queue_release = bond_ethdev_tx_queue_release, + .link_update = bond_ethdev_link_update, + .stats_get = bond_ethdev_stats_get, + .stats_reset = bond_ethdev_stats_reset, + .promiscuous_enable = bond_ethdev_promiscuous_enable, + .promiscuous_disable = bond_ethdev_promiscuous_disable, + .allmulticast_enable = bond_ethdev_allmulticast_enable, + .allmulticast_disable = bond_ethdev_allmulticast_disable, + .reta_update = bond_ethdev_rss_reta_update, + .reta_query = bond_ethdev_rss_reta_query, + .rss_hash_update = bond_ethdev_rss_hash_update, + .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get, + .mtu_set = bond_ethdev_mtu_set, + .mac_addr_set = bond_ethdev_mac_address_set, + .mac_addr_add = bond_ethdev_mac_addr_add, + .mac_addr_remove = bond_ethdev_mac_addr_remove, + .filter_ctrl = bond_filter_ctrl +}; + +static int +bond_alloc(struct rte_vdev_device *dev, uint8_t mode) +{ + const char *name = rte_vdev_device_name(dev); + uint8_t socket_id = dev->device.numa_node; + struct bond_dev_private *internals = NULL; + struct rte_eth_dev *eth_dev = NULL; + uint32_t vlan_filter_bmp_size; + + /* now do all data allocation - for eth_dev structure, dummy pci driver + * and internal (private) data + */ + + /* reserve an ethdev entry */ + eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals)); + if (eth_dev == NULL) { + RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); + goto err; + } + + internals = eth_dev->data->dev_private; + eth_dev->data->nb_rx_queues = (uint16_t)1; + eth_dev->data->nb_tx_queues = (uint16_t)1; + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc_socket(name, RTE_ETHER_ADDR_LEN * + BOND_MAX_MAC_ADDRS, 0, socket_id); + if (eth_dev->data->mac_addrs == NULL) { + RTE_BOND_LOG(ERR, + "Failed to allocate %u bytes needed to store MAC addresses", + RTE_ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS); + goto err; + } + + eth_dev->dev_ops = &default_dev_ops; + eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC; + + rte_spinlock_init(&internals->lock); + rte_spinlock_init(&internals->lsc_lock); + + internals->port_id = eth_dev->data->port_id; + internals->mode = BONDING_MODE_INVALID; + internals->current_primary_port = RTE_MAX_ETHPORTS + 1; + internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2; + internals->burst_xmit_hash = burst_xmit_l2_hash; + internals->user_defined_mac = 0; + + internals->link_status_polling_enabled = 0; + + internals->link_status_polling_interval_ms = + DEFAULT_POLLING_INTERVAL_10_MS; + internals->link_down_delay_ms = 0; + internals->link_up_delay_ms = 0; + + internals->slave_count = 0; + internals->active_slave_count = 0; + internals->rx_offload_capa = 0; + internals->tx_offload_capa = 0; + internals->rx_queue_offload_capa = 0; + internals->tx_queue_offload_capa = 0; + internals->candidate_max_rx_pktlen = 0; + internals->max_rx_pktlen = 0; + + /* Initially allow to choose any offload type */ + internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK; + + memset(&internals->default_rxconf, 0, + sizeof(internals->default_rxconf)); + memset(&internals->default_txconf, 0, + sizeof(internals->default_txconf)); + + memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim)); + memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim)); + + memset(internals->active_slaves, 0, sizeof(internals->active_slaves)); + memset(internals->slaves, 0, sizeof(internals->slaves)); + + TAILQ_INIT(&internals->flow_list); + internals->flow_isolated_valid = 0; + + /* Set mode 4 default configuration */ + bond_mode_8023ad_setup(eth_dev, NULL); + if (bond_ethdev_mode_set(eth_dev, mode)) { + RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d", + eth_dev->data->port_id, mode); + goto err; + } + + vlan_filter_bmp_size = + rte_bitmap_get_memory_footprint(RTE_ETHER_MAX_VLAN_ID + 1); + internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size, + RTE_CACHE_LINE_SIZE); + if (internals->vlan_filter_bmpmem == NULL) { + RTE_BOND_LOG(ERR, + "Failed to allocate vlan bitmap for bonded device %u", + eth_dev->data->port_id); + goto err; + } + + internals->vlan_filter_bmp = rte_bitmap_init(RTE_ETHER_MAX_VLAN_ID + 1, + internals->vlan_filter_bmpmem, vlan_filter_bmp_size); + if (internals->vlan_filter_bmp == NULL) { + RTE_BOND_LOG(ERR, + "Failed to init vlan bitmap for bonded device %u", + eth_dev->data->port_id); + rte_free(internals->vlan_filter_bmpmem); + goto err; + } + + return eth_dev->data->port_id; + +err: + rte_free(internals); + if (eth_dev != NULL) + eth_dev->data->dev_private = NULL; + rte_eth_dev_release_port(eth_dev); + return -1; +} + +static int +bond_probe(struct rte_vdev_device *dev) +{ + const char *name; + struct bond_dev_private *internals; + struct rte_kvargs *kvlist; + uint8_t bonding_mode, socket_id/*, agg_mode*/; + int arg_count, port_id; + uint8_t agg_mode; + struct rte_eth_dev *eth_dev; + + if (!dev) + return -EINVAL; + + name = rte_vdev_device_name(dev); + RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + RTE_BOND_LOG(ERR, "Failed to probe %s", name); + return -1; + } + /* TODO: request info from primary to set up Rx and Tx */ + eth_dev->dev_ops = &default_dev_ops; + eth_dev->device = &dev->device; + rte_eth_dev_probing_finish(eth_dev); + return 0; + } + + kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), + pmd_bond_init_valid_arguments); + if (kvlist == NULL) + return -1; + + /* Parse link bonding mode */ + if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) { + if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG, + &bond_ethdev_parse_slave_mode_kvarg, + &bonding_mode) != 0) { + RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s", + name); + goto parse_error; + } + } else { + RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded " + "device %s", name); + goto parse_error; + } + + /* Parse socket id to create bonding device on */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG); + if (arg_count == 1) { + if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG, + &bond_ethdev_parse_socket_id_kvarg, &socket_id) + != 0) { + RTE_BOND_LOG(ERR, "Invalid socket Id specified for " + "bonded device %s", name); + goto parse_error; + } + } else if (arg_count > 1) { + RTE_BOND_LOG(ERR, "Socket Id can be specified only once for " + "bonded device %s", name); + goto parse_error; + } else { + socket_id = rte_socket_id(); + } + + dev->device.numa_node = socket_id; + + /* Create link bonding eth device */ + port_id = bond_alloc(dev, bonding_mode); + if (port_id < 0) { + RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on " + "socket %u.", name, bonding_mode, socket_id); + goto parse_error; + } + internals = rte_eth_devices[port_id].data->dev_private; + internals->kvlist = kvlist; + + if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { + if (rte_kvargs_process(kvlist, + PMD_BOND_AGG_MODE_KVARG, + &bond_ethdev_parse_slave_agg_mode_kvarg, + &agg_mode) != 0) { + RTE_BOND_LOG(ERR, + "Failed to parse agg selection mode for bonded device %s", + name); + goto parse_error; + } + + if (internals->mode == BONDING_MODE_8023AD) + internals->mode4.agg_selection = agg_mode; + } else { + internals->mode4.agg_selection = AGG_STABLE; + } + + rte_eth_dev_probing_finish(&rte_eth_devices[port_id]); + RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on " + "socket %u.", name, port_id, bonding_mode, socket_id); + return 0; + +parse_error: + rte_kvargs_free(kvlist); + + return -1; +} + +static int +bond_remove(struct rte_vdev_device *dev) +{ + struct rte_eth_dev *eth_dev; + struct bond_dev_private *internals; + const char *name; + + if (!dev) + return -EINVAL; + + name = rte_vdev_device_name(dev); + RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name); + + /* now free all data allocation - for eth_dev structure, + * dummy pci driver and internal (private) data + */ + + /* find an ethdev entry */ + eth_dev = rte_eth_dev_allocated(name); + if (eth_dev == NULL) + return -ENODEV; + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return rte_eth_dev_release_port(eth_dev); + + RTE_ASSERT(eth_dev->device == &dev->device); + + internals = eth_dev->data->dev_private; + if (internals->slave_count != 0) + return -EBUSY; + + if (eth_dev->data->dev_started == 1) { + bond_ethdev_stop(eth_dev); + bond_ethdev_close(eth_dev); + } + + eth_dev->dev_ops = NULL; + eth_dev->rx_pkt_burst = NULL; + eth_dev->tx_pkt_burst = NULL; + + internals = eth_dev->data->dev_private; + /* Try to release mempool used in mode6. If the bond + * device is not mode6, free the NULL is not problem. + */ + rte_mempool_free(internals->mode6.mempool); + rte_bitmap_free(internals->vlan_filter_bmp); + rte_free(internals->vlan_filter_bmpmem); + + rte_eth_dev_release_port(eth_dev); + + return 0; +} + +/* this part will resolve the slave portids after all the other pdev and vdev + * have been allocated */ +static int +bond_ethdev_configure(struct rte_eth_dev *dev) +{ + const char *name = dev->device->name; + struct bond_dev_private *internals = dev->data->dev_private; + struct rte_kvargs *kvlist = internals->kvlist; + int arg_count; + uint16_t port_id = dev - rte_eth_devices; + uint8_t agg_mode; + + static const uint8_t default_rss_key[40] = { + 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D, + 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4, + 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B, + 0xBE, 0xAC, 0x01, 0xFA + }; + + unsigned i, j; + + /* + * If RSS is enabled, fill table with default values and + * set key to the the value specified in port RSS configuration. + * Fall back to default RSS key if the key is not specified + */ + if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) { + if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) { + internals->rss_key_len = + dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len; + memcpy(internals->rss_key, + dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key, + internals->rss_key_len); + } else { + internals->rss_key_len = sizeof(default_rss_key); + memcpy(internals->rss_key, default_rss_key, + internals->rss_key_len); + } + + for (i = 0; i < RTE_DIM(internals->reta_conf); i++) { + internals->reta_conf[i].mask = ~0LL; + for (j = 0; j < RTE_RETA_GROUP_SIZE; j++) + internals->reta_conf[i].reta[j] = + (i * RTE_RETA_GROUP_SIZE + j) % + dev->data->nb_rx_queues; + } + } + + /* set the max_rx_pktlen */ + internals->max_rx_pktlen = internals->candidate_max_rx_pktlen; + + /* + * if no kvlist, it means that this bonded device has been created + * through the bonding api. + */ + if (!kvlist) + return 0; + + /* Parse MAC address for bonded device */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG); + if (arg_count == 1) { + struct rte_ether_addr bond_mac; + + if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG, + &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) { + RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s", + name); + return -1; + } + + /* Set MAC address */ + if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) { + RTE_BOND_LOG(ERR, + "Failed to set mac address on bonded device %s", + name); + return -1; + } + } else if (arg_count > 1) { + RTE_BOND_LOG(ERR, + "MAC address can be specified only once for bonded device %s", + name); + return -1; + } + + /* Parse/set balance mode transmit policy */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG); + if (arg_count == 1) { + uint8_t xmit_policy; + + if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG, + &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) != + 0) { + RTE_BOND_LOG(INFO, + "Invalid xmit policy specified for bonded device %s", + name); + return -1; + } + + /* Set balance mode transmit policy*/ + if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) { + RTE_BOND_LOG(ERR, + "Failed to set balance xmit policy on bonded device %s", + name); + return -1; + } + } else if (arg_count > 1) { + RTE_BOND_LOG(ERR, + "Transmit policy can be specified only once for bonded device %s", + name); + return -1; + } + + if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) { + if (rte_kvargs_process(kvlist, + PMD_BOND_AGG_MODE_KVARG, + &bond_ethdev_parse_slave_agg_mode_kvarg, + &agg_mode) != 0) { + RTE_BOND_LOG(ERR, + "Failed to parse agg selection mode for bonded device %s", + name); + } + if (internals->mode == BONDING_MODE_8023AD) { + int ret = rte_eth_bond_8023ad_agg_selection_set(port_id, + agg_mode); + if (ret < 0) { + RTE_BOND_LOG(ERR, + "Invalid args for agg selection set for bonded device %s", + name); + return -1; + } + } + } + + /* Parse/add slave ports to bonded device */ + if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) { + struct bond_ethdev_slave_ports slave_ports; + unsigned i; + + memset(&slave_ports, 0, sizeof(slave_ports)); + + if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG, + &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) { + RTE_BOND_LOG(ERR, + "Failed to parse slave ports for bonded device %s", + name); + return -1; + } + + for (i = 0; i < slave_ports.slave_count; i++) { + if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) { + RTE_BOND_LOG(ERR, + "Failed to add port %d as slave to bonded device %s", + slave_ports.slaves[i], name); + } + } + + } else { + RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name); + return -1; + } + + /* Parse/set primary slave port id*/ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG); + if (arg_count == 1) { + uint16_t primary_slave_port_id; + + if (rte_kvargs_process(kvlist, + PMD_BOND_PRIMARY_SLAVE_KVARG, + &bond_ethdev_parse_primary_slave_port_id_kvarg, + &primary_slave_port_id) < 0) { + RTE_BOND_LOG(INFO, + "Invalid primary slave port id specified for bonded device %s", + name); + return -1; + } + + /* Set balance mode transmit policy*/ + if (rte_eth_bond_primary_set(port_id, primary_slave_port_id) + != 0) { + RTE_BOND_LOG(ERR, + "Failed to set primary slave port %d on bonded device %s", + primary_slave_port_id, name); + return -1; + } + } else if (arg_count > 1) { + RTE_BOND_LOG(INFO, + "Primary slave can be specified only once for bonded device %s", + name); + return -1; + } + + /* Parse link status monitor polling interval */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG); + if (arg_count == 1) { + uint32_t lsc_poll_interval_ms; + + if (rte_kvargs_process(kvlist, + PMD_BOND_LSC_POLL_PERIOD_KVARG, + &bond_ethdev_parse_time_ms_kvarg, + &lsc_poll_interval_ms) < 0) { + RTE_BOND_LOG(INFO, + "Invalid lsc polling interval value specified for bonded" + " device %s", name); + return -1; + } + + if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms) + != 0) { + RTE_BOND_LOG(ERR, + "Failed to set lsc monitor polling interval (%u ms) on bonded device %s", + lsc_poll_interval_ms, name); + return -1; + } + } else if (arg_count > 1) { + RTE_BOND_LOG(INFO, + "LSC polling interval can be specified only once for bonded" + " device %s", name); + return -1; + } + + /* Parse link up interrupt propagation delay */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG); + if (arg_count == 1) { + uint32_t link_up_delay_ms; + + if (rte_kvargs_process(kvlist, + PMD_BOND_LINK_UP_PROP_DELAY_KVARG, + &bond_ethdev_parse_time_ms_kvarg, + &link_up_delay_ms) < 0) { + RTE_BOND_LOG(INFO, + "Invalid link up propagation delay value specified for" + " bonded device %s", name); + return -1; + } + + /* Set balance mode transmit policy*/ + if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms) + != 0) { + RTE_BOND_LOG(ERR, + "Failed to set link up propagation delay (%u ms) on bonded" + " device %s", link_up_delay_ms, name); + return -1; + } + } else if (arg_count > 1) { + RTE_BOND_LOG(INFO, + "Link up propagation delay can be specified only once for" + " bonded device %s", name); + return -1; + } + + /* Parse link down interrupt propagation delay */ + arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG); + if (arg_count == 1) { + uint32_t link_down_delay_ms; + + if (rte_kvargs_process(kvlist, + PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG, + &bond_ethdev_parse_time_ms_kvarg, + &link_down_delay_ms) < 0) { + RTE_BOND_LOG(INFO, + "Invalid link down propagation delay value specified for" + " bonded device %s", name); + return -1; + } + + /* Set balance mode transmit policy*/ + if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms) + != 0) { + RTE_BOND_LOG(ERR, + "Failed to set link down propagation delay (%u ms) on bonded device %s", + link_down_delay_ms, name); + return -1; + } + } else if (arg_count > 1) { + RTE_BOND_LOG(INFO, + "Link down propagation delay can be specified only once for bonded device %s", + name); + return -1; + } + + return 0; +} + +struct rte_vdev_driver pmd_bond_drv = { + .probe = bond_probe, + .remove = bond_remove, +}; + +RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv); +RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond); + +RTE_PMD_REGISTER_PARAM_STRING(net_bonding, + "slave=<ifc> " + "primary=<ifc> " + "mode=[0-6] " + "xmit_policy=[l2 | l23 | l34] " + "agg_mode=[count | stable | bandwidth] " + "socket_id=<int> " + "mac=<mac addr> " + "lsc_poll_period_ms=<int> " + "up_delay=<int> " + "down_delay=<int>"); + +int bond_logtype; + +RTE_INIT(bond_init_log) +{ + bond_logtype = rte_log_register("pmd.net.bond"); + if (bond_logtype >= 0) + rte_log_set_level(bond_logtype, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/net/bonding/rte_pmd_bond_version.map b/src/spdk/dpdk/drivers/net/bonding/rte_pmd_bond_version.map new file mode 100644 index 000000000..270c7d5d5 --- /dev/null +++ b/src/spdk/dpdk/drivers/net/bonding/rte_pmd_bond_version.map @@ -0,0 +1,33 @@ +DPDK_20.0 { + global: + + rte_eth_bond_8023ad_agg_selection_get; + rte_eth_bond_8023ad_agg_selection_set; + rte_eth_bond_8023ad_conf_get; + rte_eth_bond_8023ad_dedicated_queues_disable; + rte_eth_bond_8023ad_dedicated_queues_enable; + rte_eth_bond_8023ad_ext_collect; + rte_eth_bond_8023ad_ext_collect_get; + rte_eth_bond_8023ad_ext_distrib; + rte_eth_bond_8023ad_ext_distrib_get; + rte_eth_bond_8023ad_ext_slowtx; + rte_eth_bond_8023ad_setup; + rte_eth_bond_8023ad_slave_info; + rte_eth_bond_active_slaves_get; + rte_eth_bond_create; + rte_eth_bond_free; + rte_eth_bond_link_monitoring_set; + rte_eth_bond_mac_address_reset; + rte_eth_bond_mac_address_set; + rte_eth_bond_mode_get; + rte_eth_bond_mode_set; + rte_eth_bond_primary_get; + rte_eth_bond_primary_set; + rte_eth_bond_slave_add; + rte_eth_bond_slave_remove; + rte_eth_bond_slaves_get; + rte_eth_bond_xmit_policy_get; + rte_eth_bond_xmit_policy_set; + + local: *; +}; |