diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:45:59 +0000 |
commit | 19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch) | |
tree | 42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/seastar/dpdk/drivers/net/netvsc | |
parent | Initial commit. (diff) | |
download | ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip |
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/seastar/dpdk/drivers/net/netvsc')
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/Makefile | 24 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_ethdev.c | 904 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_logs.h | 36 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_nvs.c | 550 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_nvs.h | 238 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_rndis.c | 1134 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_rndis.h | 33 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_rxtx.c | 1470 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_var.h | 241 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/hn_vf.c | 555 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/meson.build | 10 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/ndis.h | 378 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/rndis.h | 414 | ||||
-rw-r--r-- | src/seastar/dpdk/drivers/net/netvsc/rte_pmd_netvsc_version.map | 5 |
14 files changed, 5992 insertions, 0 deletions
diff --git a/src/seastar/dpdk/drivers/net/netvsc/Makefile b/src/seastar/dpdk/drivers/net/netvsc/Makefile new file mode 100644 index 000000000..71482591a --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: BSD-3-Clause + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_pmd_netvsc.a + +CFLAGS += -O3 $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API + +EXPORT_MAP := rte_pmd_netvsc_version.map + +LIBABIVER := 1 + +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_ethdev.c +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rndis.c +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_nvs.c +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_vf.c + +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs +LDLIBS += -lrte_bus_vmbus + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_ethdev.c b/src/seastar/dpdk/drivers/net/netvsc/hn_ethdev.c new file mode 100644 index 000000000..553cb06f6 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_ethdev.c @@ -0,0 +1,904 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2018 Microsoft Corporation + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. + * All rights reserved. + */ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> + +#include <rte_ethdev.h> +#include <rte_memcpy.h> +#include <rte_string_fns.h> +#include <rte_memzone.h> +#include <rte_devargs.h> +#include <rte_malloc.h> +#include <rte_kvargs.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_ether.h> +#include <rte_ethdev_driver.h> +#include <rte_cycles.h> +#include <rte_errno.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_dev.h> +#include <rte_bus_vmbus.h> + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_rndis.h" +#include "hn_nvs.h" +#include "ndis.h" + +#define HN_TX_OFFLOAD_CAPS (DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_TSO | \ + DEV_TX_OFFLOAD_MULTI_SEGS | \ + DEV_TX_OFFLOAD_VLAN_INSERT) + +#define HN_RX_OFFLOAD_CAPS (DEV_RX_OFFLOAD_CHECKSUM | \ + DEV_RX_OFFLOAD_VLAN_STRIP) + +int hn_logtype_init; +int hn_logtype_driver; + +struct hn_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned int offset; +}; + +static const struct hn_xstats_name_off hn_stat_strings[] = { + { "good_packets", offsetof(struct hn_stats, packets) }, + { "good_bytes", offsetof(struct hn_stats, bytes) }, + { "errors", offsetof(struct hn_stats, errors) }, + { "ring full", offsetof(struct hn_stats, ring_full) }, + { "multicast_packets", offsetof(struct hn_stats, multicast) }, + { "broadcast_packets", offsetof(struct hn_stats, broadcast) }, + { "undersize_packets", offsetof(struct hn_stats, size_bins[0]) }, + { "size_64_packets", offsetof(struct hn_stats, size_bins[1]) }, + { "size_65_127_packets", offsetof(struct hn_stats, size_bins[2]) }, + { "size_128_255_packets", offsetof(struct hn_stats, size_bins[3]) }, + { "size_256_511_packets", offsetof(struct hn_stats, size_bins[4]) }, + { "size_512_1023_packets", offsetof(struct hn_stats, size_bins[5]) }, + { "size_1024_1518_packets", offsetof(struct hn_stats, size_bins[6]) }, + { "size_1519_max_packets", offsetof(struct hn_stats, size_bins[7]) }, +}; + +static struct rte_eth_dev * +eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size) +{ + struct rte_eth_dev *eth_dev; + const char *name; + + if (!dev) + return NULL; + + name = dev->device.name; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = rte_eth_dev_allocate(name); + if (!eth_dev) { + PMD_DRV_LOG(NOTICE, "can not allocate rte ethdev"); + return NULL; + } + + if (private_data_size) { + eth_dev->data->dev_private = + rte_zmalloc_socket(name, private_data_size, + RTE_CACHE_LINE_SIZE, dev->device.numa_node); + if (!eth_dev->data->dev_private) { + PMD_DRV_LOG(NOTICE, "can not allocate driver data"); + rte_eth_dev_release_port(eth_dev); + return NULL; + } + } + } else { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + PMD_DRV_LOG(NOTICE, "can not attach secondary"); + return NULL; + } + } + + eth_dev->device = &dev->device; + + /* interrupt is simulated */ + dev->intr_handle.type = RTE_INTR_HANDLE_EXT; + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + eth_dev->intr_handle = &dev->intr_handle; + + /* allow ethdev to remove on close */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + + return eth_dev; +} + +static void +eth_dev_vmbus_release(struct rte_eth_dev *eth_dev) +{ + /* mac_addrs must not be freed alone because part of dev_private */ + eth_dev->data->mac_addrs = NULL; + /* free ether device */ + rte_eth_dev_release_port(eth_dev); + + eth_dev->device = NULL; + eth_dev->intr_handle = NULL; +} + +/* handle "latency=X" from devargs */ +static int hn_set_latency(const char *key, const char *value, void *opaque) +{ + struct hn_data *hv = opaque; + char *endp = NULL; + unsigned long lat; + + errno = 0; + lat = strtoul(value, &endp, 0); + + if (*value == '\0' || *endp != '\0') { + PMD_DRV_LOG(ERR, "invalid parameter %s=%s", key, value); + return -EINVAL; + } + + PMD_DRV_LOG(DEBUG, "set latency %lu usec", lat); + + hv->latency = lat * 1000; /* usec to nsec */ + return 0; +} + +/* Parse device arguments */ +static int hn_parse_args(const struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_devargs *devargs = dev->device->devargs; + static const char * const valid_keys[] = { + "latency", + NULL + }; + struct rte_kvargs *kvlist; + int ret; + + if (!devargs) + return 0; + + PMD_INIT_LOG(DEBUG, "device args %s %s", + devargs->name, devargs->args); + + kvlist = rte_kvargs_parse(devargs->args, valid_keys); + if (!kvlist) { + PMD_DRV_LOG(NOTICE, "invalid parameters"); + return -EINVAL; + } + + ret = rte_kvargs_process(kvlist, "latency", hn_set_latency, hv); + if (ret) + PMD_DRV_LOG(ERR, "Unable to process latency arg\n"); + + rte_kvargs_free(kvlist); + return ret; +} + +/* Update link status. + * Note: the DPDK definition of "wait_to_complete" + * means block this call until link is up. + * which is not worth supporting. + */ +int +hn_dev_link_update(struct rte_eth_dev *dev, + int wait_to_complete) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_link link, old; + int error; + + old = dev->data->dev_link; + + error = hn_rndis_get_linkstatus(hv); + if (error) + return error; + + hn_rndis_get_linkspeed(hv); + + hn_vf_link_update(dev, wait_to_complete); + + link = (struct rte_eth_link) { + .link_duplex = ETH_LINK_FULL_DUPLEX, + .link_autoneg = ETH_LINK_SPEED_FIXED, + .link_speed = hv->link_speed / 10000, + }; + + if (hv->link_status == NDIS_MEDIA_STATE_CONNECTED) + link.link_status = ETH_LINK_UP; + else + link.link_status = ETH_LINK_DOWN; + + if (old.link_status == link.link_status) + return 0; + + PMD_INIT_LOG(DEBUG, "Port %d is %s", dev->data->port_id, + (link.link_status == ETH_LINK_UP) ? "up" : "down"); + + return rte_eth_linkstatus_set(dev, &link); +} + +static void hn_dev_info_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info) +{ + struct hn_data *hv = dev->data->dev_private; + + dev_info->speed_capa = ETH_LINK_SPEED_10G; + dev_info->min_rx_bufsize = HN_MIN_RX_BUF_SIZE; + dev_info->max_rx_pktlen = HN_MAX_XFER_LEN; + dev_info->max_mac_addrs = 1; + + dev_info->hash_key_size = NDIS_HASH_KEYSIZE_TOEPLITZ; + dev_info->flow_type_rss_offloads = + ETH_RSS_IPV4 | ETH_RSS_IPV6 | ETH_RSS_TCP | ETH_RSS_UDP; + + dev_info->max_rx_queues = hv->max_queues; + dev_info->max_tx_queues = hv->max_queues; + + hn_rndis_get_offload(hv, dev_info); + hn_vf_info_get(hv, dev_info); +} + +static void +hn_dev_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS); + hn_vf_promiscuous_enable(dev); +} + +static void +hn_dev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + uint32_t filter; + + filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST; + if (dev->data->all_multicast) + filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; + hn_rndis_set_rxfilter(hv, filter); + hn_vf_promiscuous_disable(dev); +} + +static void +hn_dev_allmulticast_enable(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED | + NDIS_PACKET_TYPE_ALL_MULTICAST | + NDIS_PACKET_TYPE_BROADCAST); + hn_vf_allmulticast_enable(dev); +} + +static void +hn_dev_allmulticast_disable(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED | + NDIS_PACKET_TYPE_BROADCAST); + hn_vf_allmulticast_disable(dev); +} + +static int +hn_dev_mc_addr_list(struct rte_eth_dev *dev, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + /* No filtering on the synthetic path, but can do it on VF */ + return hn_vf_mc_addr_list(dev, mc_addr_set, nb_mc_addr); +} + +/* Setup shared rx/tx queue data */ +static int hn_subchan_configure(struct hn_data *hv, + uint32_t subchan) +{ + struct vmbus_channel *primary = hn_primary_chan(hv); + int err; + unsigned int retry = 0; + + PMD_DRV_LOG(DEBUG, + "open %u subchannels", subchan); + + /* Send create sub channels command */ + err = hn_nvs_alloc_subchans(hv, &subchan); + if (err) + return err; + + while (subchan > 0) { + struct vmbus_channel *new_sc; + uint16_t chn_index; + + err = rte_vmbus_subchan_open(primary, &new_sc); + if (err == -ENOENT && ++retry < 1000) { + /* This can happen if not ready yet */ + rte_delay_ms(10); + continue; + } + + if (err) { + PMD_DRV_LOG(ERR, + "open subchannel failed: %d", err); + return err; + } + + rte_vmbus_set_latency(hv->vmbus, new_sc, hv->latency); + + retry = 0; + chn_index = rte_vmbus_sub_channel_index(new_sc); + if (chn_index == 0 || chn_index > hv->max_queues) { + PMD_DRV_LOG(ERR, + "Invalid subchannel offermsg channel %u", + chn_index); + return -EIO; + } + + PMD_DRV_LOG(DEBUG, "new sub channel %u", chn_index); + hv->channels[chn_index] = new_sc; + --subchan; + } + + return err; +} + +static int hn_dev_configure(struct rte_eth_dev *dev) +{ + const struct rte_eth_conf *dev_conf = &dev->data->dev_conf; + const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode; + const struct rte_eth_txmode *txmode = &dev_conf->txmode; + + const struct rte_eth_rss_conf *rss_conf = + &dev_conf->rx_adv_conf.rss_conf; + struct hn_data *hv = dev->data->dev_private; + uint64_t unsupported; + int err, subchan; + + PMD_INIT_FUNC_TRACE(); + + unsupported = txmode->offloads & ~HN_TX_OFFLOAD_CAPS; + if (unsupported) { + PMD_DRV_LOG(NOTICE, + "unsupported TX offload: %#" PRIx64, + unsupported); + return -EINVAL; + } + + unsupported = rxmode->offloads & ~HN_RX_OFFLOAD_CAPS; + if (unsupported) { + PMD_DRV_LOG(NOTICE, + "unsupported RX offload: %#" PRIx64, + rxmode->offloads); + return -EINVAL; + } + + hv->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); + + err = hn_rndis_conf_offload(hv, txmode->offloads, + rxmode->offloads); + if (err) { + PMD_DRV_LOG(NOTICE, + "offload configure failed"); + return err; + } + + hv->num_queues = RTE_MAX(dev->data->nb_rx_queues, + dev->data->nb_tx_queues); + subchan = hv->num_queues - 1; + if (subchan > 0) { + err = hn_subchan_configure(hv, subchan); + if (err) { + PMD_DRV_LOG(NOTICE, + "subchannel configuration failed"); + return err; + } + + err = hn_rndis_conf_rss(hv, rss_conf); + if (err) { + PMD_DRV_LOG(NOTICE, + "rss configuration failed"); + return err; + } + } + + return hn_vf_configure(dev, dev_conf); +} + +static int hn_dev_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats) +{ + unsigned int i; + + hn_vf_stats_get(dev, stats); + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + const struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (!txq) + continue; + + stats->opackets += txq->stats.packets; + stats->obytes += txq->stats.bytes; + stats->oerrors += txq->stats.errors; + + if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + stats->q_opackets[i] = txq->stats.packets; + stats->q_obytes[i] = txq->stats.bytes; + } + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + if (!rxq) + continue; + + stats->ipackets += rxq->stats.packets; + stats->ibytes += rxq->stats.bytes; + stats->ierrors += rxq->stats.errors; + stats->imissed += rxq->stats.ring_full; + + if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + stats->q_ipackets[i] = rxq->stats.packets; + stats->q_ibytes[i] = rxq->stats.bytes; + } + } + + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; + return 0; +} + +static void +hn_dev_stats_reset(struct rte_eth_dev *dev) +{ + unsigned int i; + + PMD_INIT_FUNC_TRACE(); + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (!txq) + continue; + memset(&txq->stats, 0, sizeof(struct hn_stats)); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + if (!rxq) + continue; + + memset(&rxq->stats, 0, sizeof(struct hn_stats)); + } +} + +static void +hn_dev_xstats_reset(struct rte_eth_dev *dev) +{ + hn_dev_stats_reset(dev); + hn_vf_xstats_reset(dev); +} + +static int +hn_dev_xstats_count(struct rte_eth_dev *dev) +{ + int ret, count; + + count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings); + count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings); + + ret = hn_vf_xstats_get_names(dev, NULL, 0); + if (ret < 0) + return ret; + + return count + ret; +} + +static int +hn_dev_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + unsigned int limit) +{ + unsigned int i, t, count = 0; + int ret; + + if (!xstats_names) + return hn_dev_xstats_count(dev); + + /* Note: limit checked in rte_eth_xstats_names() */ + for (i = 0; i < dev->data->nb_tx_queues; i++) { + const struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (!txq) + continue; + + if (count >= limit) + break; + + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) + snprintf(xstats_names[count++].name, + RTE_ETH_XSTATS_NAME_SIZE, + "tx_q%u_%s", i, hn_stat_strings[t].name); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + if (!rxq) + continue; + + if (count >= limit) + break; + + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) + snprintf(xstats_names[count++].name, + RTE_ETH_XSTATS_NAME_SIZE, + "rx_q%u_%s", i, + hn_stat_strings[t].name); + } + + ret = hn_vf_xstats_get_names(dev, xstats_names + count, + limit - count); + if (ret < 0) + return ret; + + return count + ret; +} + +static int +hn_dev_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, + unsigned int n) +{ + unsigned int i, t, count = 0; + const unsigned int nstats = hn_dev_xstats_count(dev); + const char *stats; + int ret; + + PMD_INIT_FUNC_TRACE(); + + if (n < nstats) + return nstats; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + const struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (!txq) + continue; + + stats = (const char *)&txq->stats; + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) + xstats[count++].value = *(const uint64_t *) + (stats + hn_stat_strings[t].offset); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + if (!rxq) + continue; + + stats = (const char *)&rxq->stats; + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) + xstats[count++].value = *(const uint64_t *) + (stats + hn_stat_strings[t].offset); + } + + ret = hn_vf_xstats_get(dev, xstats + count, n - count); + if (ret < 0) + return ret; + + return count + ret; +} + +static int +hn_dev_start(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + int error; + + PMD_INIT_FUNC_TRACE(); + + error = hn_rndis_set_rxfilter(hv, + NDIS_PACKET_TYPE_BROADCAST | + NDIS_PACKET_TYPE_ALL_MULTICAST | + NDIS_PACKET_TYPE_DIRECTED); + if (error) + return error; + + error = hn_vf_start(dev); + if (error) + hn_rndis_set_rxfilter(hv, 0); + + return error; +} + +static void +hn_dev_stop(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + hn_rndis_set_rxfilter(hv, 0); + hn_vf_stop(dev); +} + +static void +hn_dev_close(struct rte_eth_dev *dev) +{ + PMD_INIT_FUNC_TRACE(); + + hn_vf_close(dev); + hn_dev_free_queues(dev); +} + +static const struct eth_dev_ops hn_eth_dev_ops = { + .dev_configure = hn_dev_configure, + .dev_start = hn_dev_start, + .dev_stop = hn_dev_stop, + .dev_close = hn_dev_close, + .dev_infos_get = hn_dev_info_get, + .dev_supported_ptypes_get = hn_vf_supported_ptypes, + .promiscuous_enable = hn_dev_promiscuous_enable, + .promiscuous_disable = hn_dev_promiscuous_disable, + .allmulticast_enable = hn_dev_allmulticast_enable, + .allmulticast_disable = hn_dev_allmulticast_disable, + .set_mc_addr_list = hn_dev_mc_addr_list, + .tx_queue_setup = hn_dev_tx_queue_setup, + .tx_queue_release = hn_dev_tx_queue_release, + .tx_done_cleanup = hn_dev_tx_done_cleanup, + .rx_queue_setup = hn_dev_rx_queue_setup, + .rx_queue_release = hn_dev_rx_queue_release, + .link_update = hn_dev_link_update, + .stats_get = hn_dev_stats_get, + .stats_reset = hn_dev_stats_reset, + .xstats_get = hn_dev_xstats_get, + .xstats_get_names = hn_dev_xstats_get_names, + .xstats_reset = hn_dev_xstats_reset, +}; + +/* + * Setup connection between PMD and kernel. + */ +static int +hn_attach(struct hn_data *hv, unsigned int mtu) +{ + int error; + + /* Attach NVS */ + error = hn_nvs_attach(hv, mtu); + if (error) + goto failed_nvs; + + /* Attach RNDIS */ + error = hn_rndis_attach(hv); + if (error) + goto failed_rndis; + + /* + * NOTE: + * Under certain conditions on certain versions of Hyper-V, + * the RNDIS rxfilter is _not_ zero on the hypervisor side + * after the successful RNDIS initialization. + */ + hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_NONE); + return 0; +failed_rndis: + hn_nvs_detach(hv); +failed_nvs: + return error; +} + +static void +hn_detach(struct hn_data *hv) +{ + hn_nvs_detach(hv); + hn_rndis_detach(hv); +} + +static int +eth_hn_dev_init(struct rte_eth_dev *eth_dev) +{ + struct hn_data *hv = eth_dev->data->dev_private; + struct rte_device *device = eth_dev->device; + struct rte_vmbus_device *vmbus; + unsigned int rxr_cnt; + int err, max_chan; + + PMD_INIT_FUNC_TRACE(); + + vmbus = container_of(device, struct rte_vmbus_device, device); + eth_dev->dev_ops = &hn_eth_dev_ops; + eth_dev->tx_pkt_burst = &hn_xmit_pkts; + eth_dev->rx_pkt_burst = &hn_recv_pkts; + + /* + * for secondary processes, we don't initialize any further as primary + * has already done this work. + */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + /* Since Hyper-V only supports one MAC address, just use local data */ + eth_dev->data->mac_addrs = &hv->mac_addr; + + hv->vmbus = vmbus; + hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP]; + hv->chim_res = &vmbus->resource[HV_SEND_BUF_MAP]; + hv->port_id = eth_dev->data->port_id; + hv->latency = HN_CHAN_LATENCY_NS; + hv->max_queues = 1; + hv->vf_port = HN_INVALID_PORT; + + err = hn_parse_args(eth_dev); + if (err) + return err; + + strlcpy(hv->owner.name, eth_dev->device->name, + RTE_ETH_MAX_OWNER_NAME_LEN); + err = rte_eth_dev_owner_new(&hv->owner.id); + if (err) { + PMD_INIT_LOG(ERR, "Can not get owner id"); + return err; + } + + /* Initialize primary channel input for control operations */ + err = rte_vmbus_chan_open(vmbus, &hv->channels[0]); + if (err) + return err; + + rte_vmbus_set_latency(hv->vmbus, hv->channels[0], hv->latency); + + hv->primary = hn_rx_queue_alloc(hv, 0, + eth_dev->device->numa_node); + + if (!hv->primary) + return -ENOMEM; + + err = hn_attach(hv, ETHER_MTU); + if (err) + goto failed; + + err = hn_tx_pool_init(eth_dev); + if (err) + goto failed; + + err = hn_rndis_get_eaddr(hv, hv->mac_addr.addr_bytes); + if (err) + goto failed; + + /* Multi queue requires later versions of windows server */ + if (hv->nvs_ver < NVS_VERSION_5) + return 0; + + max_chan = rte_vmbus_max_channels(vmbus); + PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan); + if (max_chan <= 0) + goto failed; + + if (hn_rndis_query_rsscaps(hv, &rxr_cnt) != 0) + rxr_cnt = 1; + + hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan); + + /* If VF was reported but not added, do it now */ + if (hv->vf_present && !hn_vf_attached(hv)) { + PMD_INIT_LOG(DEBUG, "Adding VF device"); + + err = hn_vf_add(eth_dev, hv); + if (err) + hv->vf_present = 0; + } + + return 0; + +failed: + PMD_INIT_LOG(NOTICE, "device init failed"); + + hn_tx_pool_uninit(eth_dev); + hn_detach(hv); + return err; +} + +static int +eth_hn_dev_uninit(struct rte_eth_dev *eth_dev) +{ + struct hn_data *hv = eth_dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + hn_dev_stop(eth_dev); + hn_dev_close(eth_dev); + + eth_dev->dev_ops = NULL; + eth_dev->tx_pkt_burst = NULL; + eth_dev->rx_pkt_burst = NULL; + + hn_detach(hv); + hn_tx_pool_uninit(eth_dev); + rte_vmbus_chan_close(hv->primary->chan); + rte_free(hv->primary); + rte_eth_dev_owner_delete(hv->owner.id); + + return 0; +} + +static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused, + struct rte_vmbus_device *dev) +{ + struct rte_eth_dev *eth_dev; + int ret; + + PMD_INIT_FUNC_TRACE(); + + eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data)); + if (!eth_dev) + return -ENOMEM; + + ret = eth_hn_dev_init(eth_dev); + if (ret) + eth_dev_vmbus_release(eth_dev); + else + rte_eth_dev_probing_finish(eth_dev); + + return ret; +} + +static int eth_hn_remove(struct rte_vmbus_device *dev) +{ + struct rte_eth_dev *eth_dev; + int ret; + + PMD_INIT_FUNC_TRACE(); + + eth_dev = rte_eth_dev_allocated(dev->device.name); + if (!eth_dev) + return -ENODEV; + + ret = eth_hn_dev_uninit(eth_dev); + if (ret) + return ret; + + eth_dev_vmbus_release(eth_dev); + return 0; +} + +/* Network device GUID */ +static const rte_uuid_t hn_net_ids[] = { + /* f8615163-df3e-46c5-913f-f2d2f965ed0e */ + RTE_UUID_INIT(0xf8615163, 0xdf3e, 0x46c5, 0x913f, 0xf2d2f965ed0eULL), + { 0 } +}; + +static struct rte_vmbus_driver rte_netvsc_pmd = { + .id_table = hn_net_ids, + .probe = eth_hn_probe, + .remove = eth_hn_remove, +}; + +RTE_PMD_REGISTER_VMBUS(net_netvsc, rte_netvsc_pmd); +RTE_PMD_REGISTER_KMOD_DEP(net_netvsc, "* uio_hv_generic"); + +RTE_INIT(hn_init_log) +{ + hn_logtype_init = rte_log_register("pmd.net.netvsc.init"); + if (hn_logtype_init >= 0) + rte_log_set_level(hn_logtype_init, RTE_LOG_NOTICE); + hn_logtype_driver = rte_log_register("pmd.net.netvsc.driver"); + if (hn_logtype_driver >= 0) + rte_log_set_level(hn_logtype_driver, RTE_LOG_NOTICE); +} diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_logs.h b/src/seastar/dpdk/drivers/net/netvsc/hn_logs.h new file mode 100644 index 000000000..cddadef09 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_logs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef _HN_LOGS_H_ +#define _HN_LOGS_H_ + +#include <rte_log.h> + +extern int hn_logtype_init; +extern int hn_logtype_driver; + +#define PMD_INIT_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, hn_logtype_init, "%s(): " fmt "\n",\ + __func__, ## args) +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") + +#ifdef RTE_LIBRTE_NETVSC_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, hn_logtype_driver, \ + "%s() rx: " fmt "\n", __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_NETVSC_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, hn_logtype_driver, \ + "%s() tx: " fmt "\n", __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while (0) +#endif + +#define PMD_DRV_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, hn_logtype_driver, "%s(): " fmt "\n", \ + __func__, ## args) + +#endif /* _HN_LOGS_H_ */ diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.c b/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.c new file mode 100644 index 000000000..d58770e04 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.c @@ -0,0 +1,550 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * Copyright (c) 2010-2012 Citrix Inc. + * Copyright (c) 2012 NetApp Inc. + * All rights reserved. + */ + +/* + * Network Virtualization Service. + */ + + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> + +#include <rte_ethdev.h> +#include <rte_string_fns.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_ether.h> +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_cycles.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_dev.h> +#include <rte_bus_vmbus.h> + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_nvs.h" + +static const uint32_t hn_nvs_version[] = { + NVS_VERSION_61, + NVS_VERSION_6, + NVS_VERSION_5, + NVS_VERSION_4, + NVS_VERSION_2, + NVS_VERSION_1 +}; + +static int hn_nvs_req_send(struct hn_data *hv, + void *req, uint32_t reqlen) +{ + return rte_vmbus_chan_send(hn_primary_chan(hv), + VMBUS_CHANPKT_TYPE_INBAND, + req, reqlen, 0, + VMBUS_CHANPKT_FLAG_NONE, NULL); +} + +static int +hn_nvs_execute(struct hn_data *hv, + void *req, uint32_t reqlen, + void *resp, uint32_t resplen, + uint32_t type) +{ + struct vmbus_channel *chan = hn_primary_chan(hv); + char buffer[NVS_RESPSIZE_MAX]; + const struct hn_nvs_hdr *hdr; + uint32_t len; + int ret; + + /* Send request to ring buffer */ + ret = rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, + req, reqlen, 0, + VMBUS_CHANPKT_FLAG_RC, NULL); + + if (ret) { + PMD_DRV_LOG(ERR, "send request failed: %d", ret); + return ret; + } + + retry: + len = sizeof(buffer); + ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL); + if (ret == -EAGAIN) { + rte_delay_us(HN_CHAN_INTERVAL_US); + goto retry; + } + + if (ret < 0) { + PMD_DRV_LOG(ERR, "recv response failed: %d", ret); + return ret; + } + + hdr = (struct hn_nvs_hdr *)buffer; + if (hdr->type != type) { + PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x", + hdr->type, type); + return -EINVAL; + } + + if (len < resplen) { + PMD_DRV_LOG(ERR, + "invalid NVS resp len %u (expect %u)", + len, resplen); + return -EINVAL; + } + + memcpy(resp, buffer, resplen); + + /* All pass! */ + return 0; +} + +static int +hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver) +{ + struct hn_nvs_init init; + struct hn_nvs_init_resp resp; + uint32_t status; + int error; + + memset(&init, 0, sizeof(init)); + init.type = NVS_TYPE_INIT; + init.ver_min = nvs_ver; + init.ver_max = nvs_ver; + + error = hn_nvs_execute(hv, &init, sizeof(init), + &resp, sizeof(resp), + NVS_TYPE_INIT_RESP); + if (error) + return error; + + status = resp.status; + if (status != NVS_STATUS_OK) { + /* Not fatal, try other versions */ + PMD_INIT_LOG(DEBUG, "nvs init failed for ver 0x%x", + nvs_ver); + return -EINVAL; + } + + return 0; +} + +static int +hn_nvs_conn_rxbuf(struct hn_data *hv) +{ + struct hn_nvs_rxbuf_conn conn; + struct hn_nvs_rxbuf_connresp resp; + uint32_t status; + int error; + + /* Kernel has already setup RXBUF on primary channel. */ + + /* + * Connect RXBUF to NVS. + */ + conn.type = NVS_TYPE_RXBUF_CONN; + conn.gpadl = hv->rxbuf_res->phys_addr; + conn.sig = NVS_RXBUF_SIG; + PMD_DRV_LOG(DEBUG, "connect rxbuff va=%p gpad=%#" PRIx64, + hv->rxbuf_res->addr, + hv->rxbuf_res->phys_addr); + + error = hn_nvs_execute(hv, &conn, sizeof(conn), + &resp, sizeof(resp), + NVS_TYPE_RXBUF_CONNRESP); + if (error) { + PMD_DRV_LOG(ERR, + "exec nvs rxbuf conn failed: %d", + error); + return error; + } + + status = resp.status; + if (status != NVS_STATUS_OK) { + PMD_DRV_LOG(ERR, + "nvs rxbuf conn failed: %x", status); + return -EIO; + } + if (resp.nsect != 1) { + PMD_DRV_LOG(ERR, + "nvs rxbuf response num sections %u != 1", + resp.nsect); + return -EIO; + } + + PMD_DRV_LOG(INFO, + "receive buffer size %u count %u", + resp.nvs_sect[0].slotsz, + resp.nvs_sect[0].slotcnt); + hv->rxbuf_section_cnt = resp.nvs_sect[0].slotcnt; + + hv->rxbuf_info = rte_calloc("HN_RXBUF_INFO", hv->rxbuf_section_cnt, + sizeof(*hv->rxbuf_info), RTE_CACHE_LINE_SIZE); + if (!hv->rxbuf_info) { + PMD_DRV_LOG(ERR, + "could not allocate rxbuf info"); + return -ENOMEM; + } + + return 0; +} + +static void +hn_nvs_disconn_rxbuf(struct hn_data *hv) +{ + struct hn_nvs_rxbuf_disconn disconn; + int error; + + /* + * Disconnect RXBUF from NVS. + */ + memset(&disconn, 0, sizeof(disconn)); + disconn.type = NVS_TYPE_RXBUF_DISCONN; + disconn.sig = NVS_RXBUF_SIG; + + /* NOTE: No response. */ + error = hn_nvs_req_send(hv, &disconn, sizeof(disconn)); + if (error) { + PMD_DRV_LOG(ERR, + "send nvs rxbuf disconn failed: %d", + error); + } + + rte_free(hv->rxbuf_info); + /* + * Linger long enough for NVS to disconnect RXBUF. + */ + rte_delay_ms(200); +} + +static void +hn_nvs_disconn_chim(struct hn_data *hv) +{ + int error; + + if (hv->chim_cnt != 0) { + struct hn_nvs_chim_disconn disconn; + + /* Disconnect chimney sending buffer from NVS. */ + memset(&disconn, 0, sizeof(disconn)); + disconn.type = NVS_TYPE_CHIM_DISCONN; + disconn.sig = NVS_CHIM_SIG; + + /* NOTE: No response. */ + error = hn_nvs_req_send(hv, &disconn, sizeof(disconn)); + + if (error) { + PMD_DRV_LOG(ERR, + "send nvs chim disconn failed: %d", error); + } + + hv->chim_cnt = 0; + /* + * Linger long enough for NVS to disconnect chimney + * sending buffer. + */ + rte_delay_ms(200); + } +} + +static int +hn_nvs_conn_chim(struct hn_data *hv) +{ + struct hn_nvs_chim_conn chim; + struct hn_nvs_chim_connresp resp; + uint32_t sectsz; + unsigned long len = hv->chim_res->len; + int error; + + /* Connect chimney sending buffer to NVS */ + memset(&chim, 0, sizeof(chim)); + chim.type = NVS_TYPE_CHIM_CONN; + chim.gpadl = hv->chim_res->phys_addr; + chim.sig = NVS_CHIM_SIG; + PMD_DRV_LOG(DEBUG, "connect send buf va=%p gpad=%#" PRIx64, + hv->chim_res->addr, + hv->chim_res->phys_addr); + + error = hn_nvs_execute(hv, &chim, sizeof(chim), + &resp, sizeof(resp), + NVS_TYPE_CHIM_CONNRESP); + if (error) { + PMD_DRV_LOG(ERR, "exec nvs chim conn failed"); + return error; + } + + if (resp.status != NVS_STATUS_OK) { + PMD_DRV_LOG(ERR, "nvs chim conn failed: %x", + resp.status); + return -EIO; + } + + sectsz = resp.sectsz; + if (sectsz == 0 || sectsz & (sizeof(uint32_t) - 1)) { + /* Can't use chimney sending buffer; done! */ + PMD_DRV_LOG(NOTICE, + "invalid chimney sending buffer section size: %u", + sectsz); + error = -EINVAL; + goto cleanup; + } + + hv->chim_szmax = sectsz; + hv->chim_cnt = len / sectsz; + + PMD_DRV_LOG(INFO, "send buffer %lu section size:%u, count:%u", + len, hv->chim_szmax, hv->chim_cnt); + + /* Done! */ + return 0; + +cleanup: + hn_nvs_disconn_chim(hv); + return error; +} + +/* + * Configure MTU and enable VLAN. + */ +static int +hn_nvs_conf_ndis(struct hn_data *hv, unsigned int mtu) +{ + struct hn_nvs_ndis_conf conf; + int error; + + memset(&conf, 0, sizeof(conf)); + conf.type = NVS_TYPE_NDIS_CONF; + conf.mtu = mtu + ETHER_HDR_LEN; + conf.caps = NVS_NDIS_CONF_VLAN; + + /* enable SRIOV */ + if (hv->nvs_ver >= NVS_VERSION_5) + conf.caps |= NVS_NDIS_CONF_SRIOV; + + /* NOTE: No response. */ + error = hn_nvs_req_send(hv, &conf, sizeof(conf)); + if (error) { + PMD_DRV_LOG(ERR, + "send nvs ndis conf failed: %d", error); + return error; + } + + return 0; +} + +static int +hn_nvs_init_ndis(struct hn_data *hv) +{ + struct hn_nvs_ndis_init ndis; + int error; + + memset(&ndis, 0, sizeof(ndis)); + ndis.type = NVS_TYPE_NDIS_INIT; + ndis.ndis_major = NDIS_VERSION_MAJOR(hv->ndis_ver); + ndis.ndis_minor = NDIS_VERSION_MINOR(hv->ndis_ver); + + /* NOTE: No response. */ + error = hn_nvs_req_send(hv, &ndis, sizeof(ndis)); + if (error) + PMD_DRV_LOG(ERR, + "send nvs ndis init failed: %d", error); + + return error; +} + +static int +hn_nvs_init(struct hn_data *hv) +{ + unsigned int i; + int error; + + /* + * Find the supported NVS version and set NDIS version accordingly. + */ + for (i = 0; i < RTE_DIM(hn_nvs_version); ++i) { + error = hn_nvs_doinit(hv, hn_nvs_version[i]); + if (error) { + PMD_INIT_LOG(DEBUG, "version %#x error %d", + hn_nvs_version[i], error); + continue; + } + + hv->nvs_ver = hn_nvs_version[i]; + + /* Set NDIS version according to NVS version. */ + hv->ndis_ver = NDIS_VERSION_6_30; + if (hv->nvs_ver <= NVS_VERSION_4) + hv->ndis_ver = NDIS_VERSION_6_1; + + PMD_INIT_LOG(DEBUG, + "NVS version %#x, NDIS version %u.%u", + hv->nvs_ver, NDIS_VERSION_MAJOR(hv->ndis_ver), + NDIS_VERSION_MINOR(hv->ndis_ver)); + return 0; + } + + PMD_DRV_LOG(ERR, + "no NVS compatible version available"); + return -ENXIO; +} + +int +hn_nvs_attach(struct hn_data *hv, unsigned int mtu) +{ + int error; + + /* + * Initialize NVS. + */ + error = hn_nvs_init(hv); + if (error) + return error; + + /** Configure NDIS before initializing it. */ + if (hv->nvs_ver >= NVS_VERSION_2) { + error = hn_nvs_conf_ndis(hv, mtu); + if (error) + return error; + } + + /* + * Initialize NDIS. + */ + error = hn_nvs_init_ndis(hv); + if (error) + return error; + + /* + * Connect RXBUF. + */ + error = hn_nvs_conn_rxbuf(hv); + if (error) + return error; + + /* + * Connect chimney sending buffer. + */ + error = hn_nvs_conn_chim(hv); + if (error) { + hn_nvs_disconn_rxbuf(hv); + return error; + } + + return 0; +} + +void +hn_nvs_detach(struct hn_data *hv __rte_unused) +{ + PMD_INIT_FUNC_TRACE(); + + /* NOTE: there are no requests to stop the NVS. */ + hn_nvs_disconn_rxbuf(hv); + hn_nvs_disconn_chim(hv); +} + +/* + * Ack the consumed RXBUF associated w/ this channel packet, + * so that this RXBUF can be recycled by the hypervisor. + */ +void +hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid) +{ + unsigned int retries = 0; + struct hn_nvs_rndis_ack ack = { + .type = NVS_TYPE_RNDIS_ACK, + .status = NVS_STATUS_OK, + }; + int error; + + PMD_RX_LOG(DEBUG, "ack RX id %" PRIu64, tid); + + again: + error = rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, + &ack, sizeof(ack), tid, + VMBUS_CHANPKT_FLAG_NONE, NULL); + + if (error == 0) + return; + + if (error == -EAGAIN) { + /* + * NOTE: + * This should _not_ happen in real world, since the + * consumption of the TX bufring from the TX path is + * controlled. + */ + PMD_RX_LOG(NOTICE, "RXBUF ack retry"); + if (++retries < 10) { + rte_delay_ms(1); + goto again; + } + } + /* RXBUF leaks! */ + PMD_DRV_LOG(ERR, "RXBUF ack failed"); +} + +int +hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch) +{ + struct hn_nvs_subch_req req; + struct hn_nvs_subch_resp resp; + int error; + + memset(&req, 0, sizeof(req)); + req.type = NVS_TYPE_SUBCH_REQ; + req.op = NVS_SUBCH_OP_ALLOC; + req.nsubch = *nsubch; + + error = hn_nvs_execute(hv, &req, sizeof(req), + &resp, sizeof(resp), + NVS_TYPE_SUBCH_RESP); + if (error) + return error; + + if (resp.status != NVS_STATUS_OK) { + PMD_INIT_LOG(ERR, + "nvs subch alloc failed: %#x", + resp.status); + return -EIO; + } + + if (resp.nsubch > *nsubch) { + PMD_INIT_LOG(NOTICE, + "%u subchans are allocated, requested %u", + resp.nsubch, *nsubch); + } + *nsubch = resp.nsubch; + + return 0; +} + +void +hn_nvs_set_datapath(struct hn_data *hv, uint32_t path) +{ + struct hn_nvs_datapath dp; + int error; + + PMD_DRV_LOG(DEBUG, "set datapath %s", + path ? "VF" : "Synthetic"); + + memset(&dp, 0, sizeof(dp)); + dp.type = NVS_TYPE_SET_DATAPATH; + dp.active_path = path; + + error = hn_nvs_req_send(hv, &dp, sizeof(dp)); + if (error) { + PMD_DRV_LOG(ERR, + "send set datapath failed: %d", + error); + } +} diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.h b/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.h new file mode 100644 index 000000000..2563fd8d8 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * All rights reserved. + */ + +/* + * The indirection table message is the largest message + * received from host, and that is 112 bytes. + */ +#define NVS_RESPSIZE_MAX 256 + +/* + * NDIS protocol version numbers + */ +#define NDIS_VERSION_6_1 0x00060001 +#define NDIS_VERSION_6_20 0x00060014 +#define NDIS_VERSION_6_30 0x0006001e +#define NDIS_VERSION_MAJOR(ver) (((ver) & 0xffff0000) >> 16) +#define NDIS_VERSION_MINOR(ver) ((ver) & 0xffff) + +/* + * NVS versions. + */ +#define NVS_VERSION_1 0x00002 +#define NVS_VERSION_2 0x30002 +#define NVS_VERSION_4 0x40000 +#define NVS_VERSION_5 0x50000 +#define NVS_VERSION_6 0x60000 +#define NVS_VERSION_61 0x60001 + +#define NVS_RXBUF_SIG 0xcafe +#define NVS_CHIM_SIG 0xface + +#define NVS_CHIM_IDX_INVALID 0xffffffff + +#define NVS_RNDIS_MTYPE_DATA 0 +#define NVS_RNDIS_MTYPE_CTRL 1 + +/* + * NVS message transacion status codes. + */ +#define NVS_STATUS_OK 1 +#define NVS_STATUS_FAILED 2 + +/* + * NVS request/response message types. + */ +#define NVS_TYPE_INIT 1 +#define NVS_TYPE_INIT_RESP 2 + +#define NVS_TYPE_NDIS_INIT 100 +#define NVS_TYPE_RXBUF_CONN 101 +#define NVS_TYPE_RXBUF_CONNRESP 102 +#define NVS_TYPE_RXBUF_DISCONN 103 +#define NVS_TYPE_CHIM_CONN 104 +#define NVS_TYPE_CHIM_CONNRESP 105 +#define NVS_TYPE_CHIM_DISCONN 106 +#define NVS_TYPE_RNDIS 107 +#define NVS_TYPE_RNDIS_ACK 108 + +#define NVS_TYPE_NDIS_CONF 125 +#define NVS_TYPE_VFASSOC_NOTE 128 /* notification */ +#define NVS_TYPE_SET_DATAPATH 129 +#define NVS_TYPE_SUBCH_REQ 133 +#define NVS_TYPE_SUBCH_RESP 133 /* same as SUBCH_REQ */ +#define NVS_TYPE_TXTBL_NOTE 134 /* notification */ + + +/* NVS message common header */ +struct hn_nvs_hdr { + uint32_t type; +} __rte_packed; + +struct hn_nvs_init { + uint32_t type; /* NVS_TYPE_INIT */ + uint32_t ver_min; + uint32_t ver_max; + uint8_t rsvd[28]; +} __rte_packed; + +struct hn_nvs_init_resp { + uint32_t type; /* NVS_TYPE_INIT_RESP */ + uint32_t ver; /* deprecated */ + uint32_t rsvd; + uint32_t status; /* NVS_STATUS_ */ +} __rte_packed; + +/* No response */ +struct hn_nvs_ndis_conf { + uint32_t type; /* NVS_TYPE_NDIS_CONF */ + uint32_t mtu; + uint32_t rsvd; + uint64_t caps; /* NVS_NDIS_CONF_ */ + uint8_t rsvd1[20]; +} __rte_packed; + +#define NVS_NDIS_CONF_SRIOV 0x0004 +#define NVS_NDIS_CONF_VLAN 0x0008 + +/* No response */ +struct hn_nvs_ndis_init { + uint32_t type; /* NVS_TYPE_NDIS_INIT */ + uint32_t ndis_major; /* NDIS_VERSION_MAJOR_ */ + uint32_t ndis_minor; /* NDIS_VERSION_MINOR_ */ + uint8_t rsvd[28]; +} __rte_packed; + +struct hn_nvs_vf_association { + uint32_t type; /* NVS_TYPE_VFASSOC_NOTE */ + uint32_t allocated; + uint32_t serial; +} __rte_packed; + +#define NVS_DATAPATH_SYNTHETIC 0 +#define NVS_DATAPATH_VF 1 + +/* No response */ +struct hn_nvs_datapath { + uint32_t type; /* NVS_TYPE_SET_DATAPATH */ + uint32_t active_path;/* NVS_DATAPATH_* */ + uint8_t rsvd[32]; +} __rte_packed; + +struct hn_nvs_rxbuf_conn { + uint32_t type; /* NVS_TYPE_RXBUF_CONN */ + uint32_t gpadl; /* RXBUF vmbus GPADL */ + uint16_t sig; /* NVS_RXBUF_SIG */ + uint8_t rsvd[30]; +} __rte_packed; + +struct hn_nvs_rxbuf_sect { + uint32_t start; + uint32_t slotsz; + uint32_t slotcnt; + uint32_t end; +} __rte_packed; + +struct hn_nvs_rxbuf_connresp { + uint32_t type; /* NVS_TYPE_RXBUF_CONNRESP */ + uint32_t status; /* NVS_STATUS_ */ + uint32_t nsect; /* # of elem in nvs_sect */ + struct hn_nvs_rxbuf_sect nvs_sect[1]; +} __rte_packed; + +/* No response */ +struct hn_nvs_rxbuf_disconn { + uint32_t type; /* NVS_TYPE_RXBUF_DISCONN */ + uint16_t sig; /* NVS_RXBUF_SIG */ + uint8_t rsvd[34]; +} __rte_packed; + +struct hn_nvs_chim_conn { + uint32_t type; /* NVS_TYPE_CHIM_CONN */ + uint32_t gpadl; /* chimney buf vmbus GPADL */ + uint16_t sig; /* NDIS_NVS_CHIM_SIG */ + uint8_t rsvd[30]; +} __rte_packed; + +struct hn_nvs_chim_connresp { + uint32_t type; /* NVS_TYPE_CHIM_CONNRESP */ + uint32_t status; /* NVS_STATUS_ */ + uint32_t sectsz; /* section size */ +} __rte_packed; + +/* No response */ +struct hn_nvs_chim_disconn { + uint32_t type; /* NVS_TYPE_CHIM_DISCONN */ + uint16_t sig; /* NVS_CHIM_SIG */ + uint8_t rsvd[34]; +} __rte_packed; + +#define NVS_SUBCH_OP_ALLOC 1 + +struct hn_nvs_subch_req { + uint32_t type; /* NVS_TYPE_SUBCH_REQ */ + uint32_t op; /* NVS_SUBCH_OP_ */ + uint32_t nsubch; + uint8_t rsvd[28]; +} __rte_packed; + +struct hn_nvs_subch_resp { + uint32_t type; /* NVS_TYPE_SUBCH_RESP */ + uint32_t status; /* NVS_STATUS_ */ + uint32_t nsubch; + uint8_t rsvd[28]; +} __rte_packed; + +struct hn_nvs_rndis { + uint32_t type; /* NVS_TYPE_RNDIS */ + uint32_t rndis_mtype;/* NVS_RNDIS_MTYPE_ */ + /* + * Chimney sending buffer index and size. + * + * NOTE: + * If nvs_chim_idx is set to NVS_CHIM_IDX_INVALID + * and nvs_chim_sz is set to 0, then chimney sending + * buffer is _not_ used by this RNDIS message. + */ + uint32_t chim_idx; + uint32_t chim_sz; + uint8_t rsvd[24]; +} __rte_packed; + +struct hn_nvs_rndis_ack { + uint32_t type; /* NVS_TYPE_RNDIS_ACK */ + uint32_t status; /* NVS_STATUS_ */ + uint8_t rsvd[32]; +} __rte_packed; + + +int hn_nvs_attach(struct hn_data *hv, unsigned int mtu); +void hn_nvs_detach(struct hn_data *hv); +void hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid); +int hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch); +void hn_nvs_set_datapath(struct hn_data *hv, uint32_t path); +void hn_nvs_handle_vfassoc(struct rte_eth_dev *dev, + const struct vmbus_chanpkt_hdr *hdr, + const void *data); + +static inline int +hn_nvs_send(struct vmbus_channel *chan, uint16_t flags, + void *nvs_msg, int nvs_msglen, uintptr_t sndc, + bool *need_sig) +{ + return rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, + nvs_msg, nvs_msglen, (uint64_t)sndc, + flags, need_sig); +} + +static inline int +hn_nvs_send_sglist(struct vmbus_channel *chan, + struct vmbus_gpa sg[], unsigned int sglen, + void *nvs_msg, int nvs_msglen, + uintptr_t sndc, bool *need_sig) +{ + return rte_vmbus_chan_send_sglist(chan, sg, sglen, nvs_msg, nvs_msglen, + (uint64_t)sndc, need_sig); +} diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.c b/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.c new file mode 100644 index 000000000..0134ecb67 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.c @@ -0,0 +1,1134 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2009-2018 Microsoft Corp. + * Copyright (c) 2010-2012 Citrix Inc. + * Copyright (c) 2012 NetApp Inc. + * All rights reserved. + */ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> + +#include <rte_ethdev_driver.h> +#include <rte_ethdev.h> +#include <rte_string_fns.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_ether.h> +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_cycles.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_dev.h> +#include <rte_bus_vmbus.h> + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_nvs.h" +#include "hn_rndis.h" +#include "ndis.h" + +#define HN_RNDIS_XFER_SIZE 0x4000 + +#define HN_NDIS_TXCSUM_CAP_IP4 \ + (NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT) +#define HN_NDIS_TXCSUM_CAP_TCP4 \ + (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT) +#define HN_NDIS_TXCSUM_CAP_TCP6 \ + (NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \ + NDIS_TXCSUM_CAP_IP6EXT) +#define HN_NDIS_TXCSUM_CAP_UDP6 \ + (NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT) +#define HN_NDIS_LSOV2_CAP_IP6 \ + (NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT) + +/* Get unique request id */ +static inline uint32_t +hn_rndis_rid(struct hn_data *hv) +{ + uint32_t rid; + + do { + rid = rte_atomic32_add_return(&hv->rndis_req_id, 1); + } while (rid == 0); + + return rid; +} + +static void *hn_rndis_alloc(struct hn_data *hv, size_t size) +{ + return rte_zmalloc_socket("RNDIS", size, PAGE_SIZE, + hv->vmbus->device.numa_node); +} + +#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP +void hn_rndis_dump(const void *buf) +{ + const union { + struct rndis_msghdr hdr; + struct rndis_packet_msg pkt; + struct rndis_init_req init_request; + struct rndis_init_comp init_complete; + struct rndis_halt_req halt; + struct rndis_query_req query_request; + struct rndis_query_comp query_complete; + struct rndis_set_req set_request; + struct rndis_set_comp set_complete; + struct rndis_reset_req reset_request; + struct rndis_reset_comp reset_complete; + struct rndis_keepalive_req keepalive_request; + struct rndis_keepalive_comp keepalive_complete; + struct rndis_status_msg indicate_status; + } *rndis_msg = buf; + + switch (rndis_msg->hdr.type) { + case RNDIS_PACKET_MSG: { + const struct rndis_pktinfo *ppi; + unsigned int ppi_len; + + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_PACKET (len %u, data %u:%u, # oob %u %u:%u, pkt %u:%u)\n", + rndis_msg->pkt.len, + rndis_msg->pkt.dataoffset, + rndis_msg->pkt.datalen, + rndis_msg->pkt.oobdataelements, + rndis_msg->pkt.oobdataoffset, + rndis_msg->pkt.oobdatalen, + rndis_msg->pkt.pktinfooffset, + rndis_msg->pkt.pktinfolen); + + ppi = (const struct rndis_pktinfo *) + ((const char *)buf + + RNDIS_PACKET_MSG_OFFSET_ABS(rndis_msg->pkt.pktinfooffset)); + + ppi_len = rndis_msg->pkt.pktinfolen; + while (ppi_len > 0) { + const void *ppi_data; + + ppi_data = ppi->data; + + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + " PPI (size %u, type %u, offs %u data %#x)\n", + ppi->size, ppi->type, ppi->offset, + *(const uint32_t *)ppi_data); + if (ppi->size == 0) + break; + ppi_len -= ppi->size; + ppi = (const struct rndis_pktinfo *) + ((const char *)ppi + ppi->size); + } + break; + } + case RNDIS_INITIALIZE_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_INIT (len %u id %#x, ver %u.%u max xfer %u)\n", + rndis_msg->init_request.len, + rndis_msg->init_request.rid, + rndis_msg->init_request.ver_major, + rndis_msg->init_request.ver_minor, + rndis_msg->init_request.max_xfersz); + break; + + case RNDIS_INITIALIZE_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_INIT_C (len %u, id %#x, status 0x%x, vers %u.%u, " + "flags %d, max xfer %u, max pkts %u, aligned %u)\n", + rndis_msg->init_complete.len, + rndis_msg->init_complete.rid, + rndis_msg->init_complete.status, + rndis_msg->init_complete.ver_major, + rndis_msg->init_complete.ver_minor, + rndis_msg->init_complete.devflags, + rndis_msg->init_complete.pktmaxsz, + rndis_msg->init_complete.pktmaxcnt, + rndis_msg->init_complete.align); + break; + + case RNDIS_HALT_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_HALT (len %u id %#x)\n", + rndis_msg->halt.len, rndis_msg->halt.rid); + break; + + case RNDIS_QUERY_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_QUERY (len %u, id %#x, oid %#x, info %u:%u)\n", + rndis_msg->query_request.len, + rndis_msg->query_request.rid, + rndis_msg->query_request.oid, + rndis_msg->query_request.infobuflen, + rndis_msg->query_request.infobufoffset); + break; + + case RNDIS_QUERY_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_QUERY_C (len %u, id %#x, status 0x%x, buf %u:%u)\n", + rndis_msg->query_complete.len, + rndis_msg->query_complete.rid, + rndis_msg->query_complete.status, + rndis_msg->query_complete.infobuflen, + rndis_msg->query_complete.infobufoffset); + break; + + case RNDIS_SET_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_SET (len %u, id %#x, oid %#x, info %u:%u)\n", + rndis_msg->set_request.len, + rndis_msg->set_request.rid, + rndis_msg->set_request.oid, + rndis_msg->set_request.infobuflen, + rndis_msg->set_request.infobufoffset); + break; + + case RNDIS_SET_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n", + rndis_msg->set_complete.len, + rndis_msg->set_complete.rid, + rndis_msg->set_complete.status); + break; + + case RNDIS_INDICATE_STATUS_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_INDICATE (len %u, status %#x, buf len %u, buf offset %u)\n", + rndis_msg->indicate_status.len, + rndis_msg->indicate_status.status, + rndis_msg->indicate_status.stbuflen, + rndis_msg->indicate_status.stbufoffset); + break; + + case RNDIS_RESET_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_RESET (len %u, id %#x)\n", + rndis_msg->reset_request.len, + rndis_msg->reset_request.rid); + break; + + case RNDIS_RESET_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_RESET_C (len %u, status %#x address %#x)\n", + rndis_msg->reset_complete.len, + rndis_msg->reset_complete.status, + rndis_msg->reset_complete.adrreset); + break; + + case RNDIS_KEEPALIVE_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_KEEPALIVE (len %u, id %#x)\n", + rndis_msg->keepalive_request.len, + rndis_msg->keepalive_request.rid); + break; + + case RNDIS_KEEPALIVE_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_KEEPALIVE_C (len %u, id %#x address %#x)\n", + rndis_msg->keepalive_complete.len, + rndis_msg->keepalive_complete.rid, + rndis_msg->keepalive_complete.status); + break; + + default: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS type %#x len %u\n", + rndis_msg->hdr.type, + rndis_msg->hdr.len); + break; + } +} +#endif + +static int hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan, + const void *req, uint32_t reqlen) + +{ + struct hn_nvs_rndis nvs_rndis = { + .type = NVS_TYPE_RNDIS, + .rndis_mtype = NVS_RNDIS_MTYPE_CTRL, + .chim_idx = NVS_CHIM_IDX_INVALID, + .chim_sz = 0 + }; + struct vmbus_gpa sg; + rte_iova_t addr; + + addr = rte_malloc_virt2iova(req); + if (unlikely(addr == RTE_BAD_IOVA)) { + PMD_DRV_LOG(ERR, "RNDIS send request can not get iova"); + return -EINVAL; + } + + if (unlikely(reqlen > PAGE_SIZE)) { + PMD_DRV_LOG(ERR, "RNDIS request %u greater than page size", + reqlen); + return -EINVAL; + } + + sg.page = addr / PAGE_SIZE; + sg.ofs = addr & PAGE_MASK; + sg.len = reqlen; + + if (sg.ofs + reqlen > PAGE_SIZE) { + PMD_DRV_LOG(ERR, "RNDIS request crosses page bounary"); + return -EINVAL; + } + + hn_rndis_dump(req); + + return hn_nvs_send_sglist(chan, &sg, 1, + &nvs_rndis, sizeof(nvs_rndis), 0U, NULL); +} + +void hn_rndis_link_status(struct rte_eth_dev *dev, const void *msg) +{ + const struct rndis_status_msg *indicate = msg; + + hn_rndis_dump(msg); + + PMD_DRV_LOG(DEBUG, "link status %#x", indicate->status); + + switch (indicate->status) { + case RNDIS_STATUS_NETWORK_CHANGE: + case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: + /* ignore not in DPDK API */ + break; + + case RNDIS_STATUS_LINK_SPEED_CHANGE: + case RNDIS_STATUS_MEDIA_CONNECT: + case RNDIS_STATUS_MEDIA_DISCONNECT: + if (dev->data->dev_conf.intr_conf.lsc && + hn_dev_link_update(dev, 0) == 0) + _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC, + NULL); + break; + default: + PMD_DRV_LOG(NOTICE, "unknown RNDIS indication: %#x", + indicate->status); + } +} + +/* Callback from hn_process_events when response is visible */ +void hn_rndis_receive_response(struct hn_data *hv, + const void *data, uint32_t len) +{ + const struct rndis_init_comp *hdr = data; + + hn_rndis_dump(data); + + if (len < sizeof(3 * sizeof(uint32_t))) { + PMD_DRV_LOG(ERR, + "missing RNDIS header %u", len); + return; + } + + if (len < hdr->len) { + PMD_DRV_LOG(ERR, + "truncated RNDIS response %u", len); + return; + } + + if (len > sizeof(hv->rndis_resp)) { + PMD_DRV_LOG(NOTICE, + "RNDIS response exceeds buffer"); + len = sizeof(hv->rndis_resp); + } + + if (hdr->rid == 0) { + PMD_DRV_LOG(NOTICE, + "RNDIS response id zero!"); + } + + memcpy(hv->rndis_resp, data, len); + + /* make sure response copied before update */ + rte_smp_wmb(); + + if (rte_atomic32_cmpset(&hv->rndis_pending, hdr->rid, 0) == 0) { + PMD_DRV_LOG(ERR, + "received id %#x pending id %#x", + hdr->rid, (uint32_t)hv->rndis_pending); + } +} + +/* Do request/response transaction */ +static int hn_rndis_exec1(struct hn_data *hv, + const void *req, uint32_t reqlen, + void *comp, uint32_t comp_len) +{ + const struct rndis_halt_req *hdr = req; + uint32_t rid = hdr->rid; + struct vmbus_channel *chan = hn_primary_chan(hv); + int error; + + if (comp_len > sizeof(hv->rndis_resp)) { + PMD_DRV_LOG(ERR, + "Expected completion size %u exceeds buffer %zu", + comp_len, sizeof(hv->rndis_resp)); + return -EIO; + } + + if (comp != NULL && + rte_atomic32_cmpset(&hv->rndis_pending, 0, rid) == 0) { + PMD_DRV_LOG(ERR, + "Request already pending"); + return -EBUSY; + } + + error = hn_nvs_send_rndis_ctrl(chan, req, reqlen); + if (error) { + PMD_DRV_LOG(ERR, "RNDIS ctrl send failed: %d", error); + return error; + } + + if (comp) { + /* Poll primary channel until response received */ + while (hv->rndis_pending == rid) + hn_process_events(hv, 0, 1); + + memcpy(comp, hv->rndis_resp, comp_len); + } + + return 0; +} + +/* Do transaction and validate response */ +static int hn_rndis_execute(struct hn_data *hv, uint32_t rid, + const void *req, uint32_t reqlen, + void *comp, uint32_t comp_len, uint32_t comp_type) +{ + const struct rndis_comp_hdr *hdr = comp; + int ret; + + memset(comp, 0, comp_len); + + ret = hn_rndis_exec1(hv, req, reqlen, comp, comp_len); + if (ret < 0) + return ret; + /* + * Check this RNDIS complete message. + */ + if (unlikely(hdr->type != comp_type)) { + PMD_DRV_LOG(ERR, + "unexpected RNDIS response complete %#x expect %#x", + hdr->type, comp_type); + + return -ENXIO; + } + if (unlikely(hdr->rid != rid)) { + PMD_DRV_LOG(ERR, + "RNDIS comp rid mismatch %#x, expect %#x", + hdr->rid, rid); + return -EINVAL; + } + + /* All pass! */ + return 0; +} + +static int +hn_rndis_query(struct hn_data *hv, uint32_t oid, + const void *idata, uint32_t idlen, + void *odata, uint32_t odlen) +{ + struct rndis_query_req *req; + struct rndis_query_comp *comp; + uint32_t reqlen, comp_len; + int error = -EIO; + unsigned int ofs; + uint32_t rid; + + reqlen = sizeof(*req) + idlen; + req = hn_rndis_alloc(hv, reqlen); + if (req == NULL) + return -ENOMEM; + + comp_len = sizeof(*comp) + odlen; + comp = rte_zmalloc("QUERY", comp_len, PAGE_SIZE); + if (!comp) { + error = -ENOMEM; + goto done; + } + comp->status = RNDIS_STATUS_PENDING; + + rid = hn_rndis_rid(hv); + + req->type = RNDIS_QUERY_MSG; + req->len = reqlen; + req->rid = rid; + req->oid = oid; + req->infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET; + req->infobuflen = idlen; + + /* Input data immediately follows RNDIS query. */ + memcpy(req + 1, idata, idlen); + + error = hn_rndis_execute(hv, rid, req, reqlen, + comp, comp_len, RNDIS_QUERY_CMPLT); + + if (error) + goto done; + + if (comp->status != RNDIS_STATUS_SUCCESS) { + PMD_DRV_LOG(ERR, "RNDIS query 0x%08x failed: status 0x%08x", + oid, comp->status); + error = -EINVAL; + goto done; + } + + if (comp->infobuflen == 0 || comp->infobufoffset == 0) { + /* No output data! */ + PMD_DRV_LOG(ERR, "RNDIS query 0x%08x, no data", oid); + error = 0; + goto done; + } + + /* + * Check output data length and offset. + */ + /* ofs is the offset from the beginning of comp. */ + ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->infobufoffset); + if (ofs < sizeof(*comp) || ofs + comp->infobuflen > comp_len) { + PMD_DRV_LOG(ERR, "RNDIS query invalid comp ib off/len, %u/%u", + comp->infobufoffset, comp->infobuflen); + error = -EINVAL; + goto done; + } + + /* Save output data. */ + if (comp->infobuflen < odlen) + odlen = comp->infobuflen; + + /* ofs is the offset from the beginning of comp. */ + memcpy(odata, (const char *)comp + ofs, odlen); + + error = 0; +done: + rte_free(comp); + rte_free(req); + return error; +} + +static int +hn_rndis_halt(struct hn_data *hv) +{ + struct rndis_halt_req *halt; + + halt = hn_rndis_alloc(hv, sizeof(*halt)); + if (halt == NULL) + return -ENOMEM; + + halt->type = RNDIS_HALT_MSG; + halt->len = sizeof(*halt); + halt->rid = hn_rndis_rid(hv); + + /* No RNDIS completion; rely on NVS message send completion */ + hn_rndis_exec1(hv, halt, sizeof(*halt), NULL, 0); + + rte_free(halt); + + PMD_INIT_LOG(DEBUG, "RNDIS halt done"); + return 0; +} + +static int +hn_rndis_query_hwcaps(struct hn_data *hv, struct ndis_offload *caps) +{ + struct ndis_offload in; + uint32_t caps_len, size; + int error; + + memset(caps, 0, sizeof(*caps)); + memset(&in, 0, sizeof(in)); + in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD; + + if (hv->ndis_ver >= NDIS_VERSION_6_30) { + in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3; + size = NDIS_OFFLOAD_SIZE; + } else if (hv->ndis_ver >= NDIS_VERSION_6_1) { + in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2; + size = NDIS_OFFLOAD_SIZE_6_1; + } else { + in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1; + size = NDIS_OFFLOAD_SIZE_6_0; + } + in.ndis_hdr.ndis_size = size; + + caps_len = NDIS_OFFLOAD_SIZE; + error = hn_rndis_query(hv, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES, + &in, size, caps, caps_len); + if (error) + return error; + + /* Preliminary verification. */ + if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) { + PMD_DRV_LOG(NOTICE, "invalid NDIS objtype 0x%02x", + caps->ndis_hdr.ndis_type); + return -EINVAL; + } + if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) { + PMD_DRV_LOG(NOTICE, "invalid NDIS objrev 0x%02x", + caps->ndis_hdr.ndis_rev); + return -EINVAL; + } + if (caps->ndis_hdr.ndis_size > caps_len) { + PMD_DRV_LOG(NOTICE, "invalid NDIS objsize %u, data size %u", + caps->ndis_hdr.ndis_size, caps_len); + return -EINVAL; + } else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) { + PMD_DRV_LOG(NOTICE, "invalid NDIS objsize %u", + caps->ndis_hdr.ndis_size); + return -EINVAL; + } + + return 0; +} + +int +hn_rndis_query_rsscaps(struct hn_data *hv, + unsigned int *rxr_cnt0) +{ + struct ndis_rss_caps in, caps; + unsigned int indsz, rxr_cnt; + uint32_t caps_len; + int error; + + *rxr_cnt0 = 0; + + if (hv->ndis_ver < NDIS_VERSION_6_20) { + PMD_DRV_LOG(DEBUG, "RSS not supported on this host"); + return -EOPNOTSUPP; + } + + memset(&in, 0, sizeof(in)); + in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS; + in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2; + in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE; + + caps_len = NDIS_RSS_CAPS_SIZE; + error = hn_rndis_query(hv, OID_GEN_RECEIVE_SCALE_CAPABILITIES, + &in, NDIS_RSS_CAPS_SIZE, + &caps, caps_len); + if (error) + return error; + + PMD_INIT_LOG(DEBUG, "RX rings %u indirect %u caps %#x", + caps.ndis_nrxr, caps.ndis_nind, caps.ndis_caps); + /* + * Preliminary verification. + */ + if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) { + PMD_DRV_LOG(ERR, "invalid NDIS objtype 0x%02x", + caps.ndis_hdr.ndis_type); + return -EINVAL; + } + if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) { + PMD_DRV_LOG(ERR, "invalid NDIS objrev 0x%02x", + caps.ndis_hdr.ndis_rev); + return -EINVAL; + } + if (caps.ndis_hdr.ndis_size > caps_len) { + PMD_DRV_LOG(ERR, + "invalid NDIS objsize %u, data size %u", + caps.ndis_hdr.ndis_size, caps_len); + return -EINVAL; + } else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) { + PMD_DRV_LOG(ERR, "invalid NDIS objsize %u", + caps.ndis_hdr.ndis_size); + return -EINVAL; + } + + /* + * Save information for later RSS configuration. + */ + if (caps.ndis_nrxr == 0) { + PMD_DRV_LOG(ERR, "0 RX rings!?"); + return -EINVAL; + } + rxr_cnt = caps.ndis_nrxr; + + if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE && + caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) { + if (caps.ndis_nind > NDIS_HASH_INDCNT) { + PMD_DRV_LOG(ERR, + "too many RSS indirect table entries %u", + caps.ndis_nind); + return -EOPNOTSUPP; + } + if (!rte_is_power_of_2(caps.ndis_nind)) { + PMD_DRV_LOG(ERR, + "RSS indirect table size is not power-of-2 %u", + caps.ndis_nind); + } + + indsz = caps.ndis_nind; + } else { + indsz = NDIS_HASH_INDCNT; + } + + if (indsz < rxr_cnt) { + PMD_DRV_LOG(NOTICE, + "# of RX rings (%d) > RSS indirect table size %d", + rxr_cnt, indsz); + rxr_cnt = indsz; + } + + hv->rss_offloads = 0; + if (caps.ndis_caps & NDIS_RSS_CAP_IPV4) + hv->rss_offloads |= ETH_RSS_IPV4 + | ETH_RSS_NONFRAG_IPV4_TCP + | ETH_RSS_NONFRAG_IPV4_UDP; + if (caps.ndis_caps & NDIS_RSS_CAP_IPV6) + hv->rss_offloads |= ETH_RSS_IPV6 + | ETH_RSS_NONFRAG_IPV6_TCP; + if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX) + hv->rss_offloads |= ETH_RSS_IPV6_EX + | ETH_RSS_IPV6_TCP_EX; + + /* Commit! */ + *rxr_cnt0 = rxr_cnt; + + return 0; +} + +static int +hn_rndis_set(struct hn_data *hv, uint32_t oid, const void *data, uint32_t dlen) +{ + struct rndis_set_req *req; + struct rndis_set_comp comp; + uint32_t reqlen, comp_len; + uint32_t rid; + int error; + + reqlen = sizeof(*req) + dlen; + req = rte_zmalloc("RNDIS_SET", reqlen, PAGE_SIZE); + if (!req) + return -ENOMEM; + + rid = hn_rndis_rid(hv); + req->type = RNDIS_SET_MSG; + req->len = reqlen; + req->rid = rid; + req->oid = oid; + req->infobuflen = dlen; + req->infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET; + + /* Data immediately follows RNDIS set. */ + memcpy(req + 1, data, dlen); + + comp_len = sizeof(comp); + error = hn_rndis_execute(hv, rid, req, reqlen, + &comp, comp_len, + RNDIS_SET_CMPLT); + if (error) { + PMD_DRV_LOG(ERR, "exec RNDIS set %#" PRIx32 " failed", + oid); + error = EIO; + goto done; + } + + if (comp.status != RNDIS_STATUS_SUCCESS) { + PMD_DRV_LOG(ERR, + "RNDIS set %#" PRIx32 " failed: status %#" PRIx32, + oid, comp.status); + error = EIO; + goto done; + } + +done: + rte_free(req); + return error; +} + +int hn_rndis_conf_offload(struct hn_data *hv, + uint64_t tx_offloads, uint64_t rx_offloads) +{ + struct ndis_offload_params params; + struct ndis_offload hwcaps; + int error; + + error = hn_rndis_query_hwcaps(hv, &hwcaps); + if (error) { + PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error); + return error; + } + + /* NOTE: 0 means "no change" */ + memset(¶ms, 0, sizeof(params)); + + params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT; + if (hv->ndis_ver < NDIS_VERSION_6_30) { + params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2; + params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE_6_1; + } else { + params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3; + params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE; + } + + if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) { + if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_TCP4) + params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + + if (hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_TCP6) + params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + } + + if (rx_offloads & DEV_RX_OFFLOAD_TCP_CKSUM) { + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) + == NDIS_RXCSUM_CAP_TCP4) + params.ndis_tcp4csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + + if ((hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) + == NDIS_RXCSUM_CAP_TCP6) + params.ndis_tcp6csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + } + + if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) { + if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) + params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + + if ((hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) + == NDIS_TXCSUM_CAP_UDP6) + params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + } + + if (rx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) { + if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) + params.ndis_udp4csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + + if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) + params.ndis_udp6csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + } + + if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) { + if ((hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_IP4) + == NDIS_TXCSUM_CAP_IP4) + params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + } + if (rx_offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) { + if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) + params.ndis_ip4csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + } + + if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO) { + if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) + params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON; + else + goto unsupported; + + if ((hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) + == HN_NDIS_LSOV2_CAP_IP6) + params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON; + else + goto unsupported; + } + + error = hn_rndis_set(hv, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, + params.ndis_hdr.ndis_size); + if (error) { + PMD_DRV_LOG(ERR, "offload config failed"); + return error; + } + + return 0; + unsupported: + PMD_DRV_LOG(NOTICE, + "offload tx:%" PRIx64 " rx:%" PRIx64 " not supported by this version", + tx_offloads, rx_offloads); + return -EINVAL; +} + +int hn_rndis_get_offload(struct hn_data *hv, + struct rte_eth_dev_info *dev_info) +{ + struct ndis_offload hwcaps; + int error; + + memset(&hwcaps, 0, sizeof(hwcaps)); + + error = hn_rndis_query_hwcaps(hv, &hwcaps); + if (error) { + PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error); + return error; + } + + dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_VLAN_INSERT; + + if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4) + == HN_NDIS_TXCSUM_CAP_IP4) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_IPV4_CKSUM; + + if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4) + == HN_NDIS_TXCSUM_CAP_TCP4 && + (hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6) + == HN_NDIS_TXCSUM_CAP_TCP6) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_CKSUM; + + if ((hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) && + (hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_UDP6)) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_UDP_CKSUM; + + if ((hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) && + (hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) + == HN_NDIS_LSOV2_CAP_IP6) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; + + dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP; + + if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_IPV4_CKSUM; + + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) && + (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6)) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_CKSUM; + + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) && + (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6)) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_UDP_CKSUM; + + return 0; +} + +uint32_t +hn_rndis_get_ptypes(struct hn_data *hv) +{ + struct ndis_offload hwcaps; + uint32_t ptypes; + int error; + + memset(&hwcaps, 0, sizeof(hwcaps)); + + error = hn_rndis_query_hwcaps(hv, &hwcaps); + if (error) { + PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error); + return RTE_PTYPE_L2_ETHER; + } + + ptypes = RTE_PTYPE_L2_ETHER; + + if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) + ptypes |= RTE_PTYPE_L3_IPV4; + + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) || + (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6)) + ptypes |= RTE_PTYPE_L4_TCP; + + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) || + (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6)) + ptypes |= RTE_PTYPE_L4_UDP; + + return ptypes; +} + +int +hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter) +{ + int error; + + error = hn_rndis_set(hv, OID_GEN_CURRENT_PACKET_FILTER, + &filter, sizeof(filter)); + if (error) { + PMD_DRV_LOG(ERR, "set RX filter %#" PRIx32 " failed: %d", + filter, error); + } else { + PMD_DRV_LOG(DEBUG, "set RX filter %#" PRIx32 " done", filter); + } + + return error; +} + +/* The default RSS key. + * This value is the same as MLX5 so that flows will be + * received on same path for both VF ans synthetic NIC. + */ +static const uint8_t rss_default_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = { + 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, + 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, + 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, + 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, + 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a, +}; + +int hn_rndis_conf_rss(struct hn_data *hv, + const struct rte_eth_rss_conf *rss_conf) +{ + struct ndis_rssprm_toeplitz rssp; + struct ndis_rss_params *prm = &rssp.rss_params; + const uint8_t *rss_key = rss_conf->rss_key ? : rss_default_key; + uint32_t rss_hash; + unsigned int i; + int error; + + PMD_INIT_FUNC_TRACE(); + + memset(&rssp, 0, sizeof(rssp)); + + prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS; + prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2; + prm->ndis_hdr.ndis_size = sizeof(*prm); + prm->ndis_flags = 0; + + rss_hash = NDIS_HASH_FUNCTION_TOEPLITZ; + if (rss_conf->rss_hf & ETH_RSS_IPV4) + rss_hash |= NDIS_HASH_IPV4; + if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) + rss_hash |= NDIS_HASH_TCP_IPV4; + if (rss_conf->rss_hf & ETH_RSS_IPV6) + rss_hash |= NDIS_HASH_IPV6; + if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) + rss_hash |= NDIS_HASH_TCP_IPV6; + + prm->ndis_hash = rss_hash; + prm->ndis_indsize = sizeof(rssp.rss_ind[0]) * NDIS_HASH_INDCNT; + prm->ndis_indoffset = offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]); + prm->ndis_keysize = NDIS_HASH_KEYSIZE_TOEPLITZ; + prm->ndis_keyoffset = offsetof(struct ndis_rssprm_toeplitz, rss_key[0]); + + for (i = 0; i < NDIS_HASH_INDCNT; i++) + rssp.rss_ind[i] = i % hv->num_queues; + + /* Set hask key values */ + memcpy(&rssp.rss_key, rss_key, NDIS_HASH_KEYSIZE_TOEPLITZ); + + error = hn_rndis_set(hv, OID_GEN_RECEIVE_SCALE_PARAMETERS, + &rssp, sizeof(rssp)); + if (error) { + PMD_DRV_LOG(ERR, + "RSS config num queues=%u failed: %d", + hv->num_queues, error); + } + return error; +} + +static int hn_rndis_init(struct hn_data *hv) +{ + struct rndis_init_req *req; + struct rndis_init_comp comp; + uint32_t comp_len, rid; + int error; + + req = hn_rndis_alloc(hv, sizeof(*req)); + if (!req) { + PMD_DRV_LOG(ERR, "no memory for RNDIS init"); + return -ENXIO; + } + + rid = hn_rndis_rid(hv); + req->type = RNDIS_INITIALIZE_MSG; + req->len = sizeof(*req); + req->rid = rid; + req->ver_major = RNDIS_VERSION_MAJOR; + req->ver_minor = RNDIS_VERSION_MINOR; + req->max_xfersz = HN_RNDIS_XFER_SIZE; + + comp_len = RNDIS_INIT_COMP_SIZE_MIN; + error = hn_rndis_execute(hv, rid, req, sizeof(*req), + &comp, comp_len, + RNDIS_INITIALIZE_CMPLT); + if (error) + goto done; + + if (comp.status != RNDIS_STATUS_SUCCESS) { + PMD_DRV_LOG(ERR, "RNDIS init failed: status 0x%08x", + comp.status); + error = -EIO; + goto done; + } + + hv->rndis_agg_size = comp.pktmaxsz; + hv->rndis_agg_pkts = comp.pktmaxcnt; + hv->rndis_agg_align = 1U << comp.align; + + if (hv->rndis_agg_align < sizeof(uint32_t)) { + /* + * The RNDIS packet message encap assumes that the RNDIS + * packet message is at least 4 bytes aligned. Fix up the + * alignment here, if the remote side sets the alignment + * too low. + */ + PMD_DRV_LOG(NOTICE, + "fixup RNDIS aggpkt align: %u -> %zu", + hv->rndis_agg_align, sizeof(uint32_t)); + hv->rndis_agg_align = sizeof(uint32_t); + } + + PMD_INIT_LOG(INFO, + "RNDIS ver %u.%u, aggpkt size %u, aggpkt cnt %u, aggpkt align %u", + comp.ver_major, comp.ver_minor, + hv->rndis_agg_size, hv->rndis_agg_pkts, + hv->rndis_agg_align); + error = 0; +done: + rte_free(req); + return error; +} + +int +hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr) +{ + uint32_t eaddr_len; + int error; + + eaddr_len = ETHER_ADDR_LEN; + error = hn_rndis_query(hv, OID_802_3_PERMANENT_ADDRESS, NULL, 0, + eaddr, eaddr_len); + if (error) + return error; + + PMD_DRV_LOG(INFO, "MAC address %02x:%02x:%02x:%02x:%02x:%02x", + eaddr[0], eaddr[1], eaddr[2], + eaddr[3], eaddr[4], eaddr[5]); + return 0; +} + +int +hn_rndis_get_linkstatus(struct hn_data *hv) +{ + return hn_rndis_query(hv, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0, + &hv->link_status, sizeof(uint32_t)); +} + +int +hn_rndis_get_linkspeed(struct hn_data *hv) +{ + return hn_rndis_query(hv, OID_GEN_LINK_SPEED, NULL, 0, + &hv->link_speed, sizeof(uint32_t)); +} + +int +hn_rndis_attach(struct hn_data *hv) +{ + /* Initialize RNDIS. */ + return hn_rndis_init(hv); +} + +void +hn_rndis_detach(struct hn_data *hv) +{ + /* Halt the RNDIS. */ + hn_rndis_halt(hv); +} diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.h b/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.h new file mode 100644 index 000000000..319b497a7 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#include "rndis.h" + +struct hn_data; + +void hn_rndis_receive_response(struct hn_data *hv, + const void *data, uint32_t len); +void hn_rndis_link_status(struct rte_eth_dev *dev, const void *msg); +int hn_rndis_attach(struct hn_data *hv); +void hn_rndis_detach(struct hn_data *hv); +int hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr); +int hn_rndis_get_linkstatus(struct hn_data *hv); +int hn_rndis_get_linkspeed(struct hn_data *hv); +int hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter); +void hn_rndis_rx_ctrl(struct hn_data *hv, const void *data, + int dlen); +int hn_rndis_get_offload(struct hn_data *hv, + struct rte_eth_dev_info *dev_info); +int hn_rndis_conf_offload(struct hn_data *hv, + uint64_t tx_offloads, + uint64_t rx_offloads); +int hn_rndis_query_rsscaps(struct hn_data *hv, + unsigned int *rxr_cnt0); +int hn_rndis_conf_rss(struct hn_data *hv, + const struct rte_eth_rss_conf *rss_conf); +uint32_t hn_rndis_get_ptypes(struct hn_data *hv); + +#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP +void hn_rndis_dump(const void *buf); +#else +#define hn_rndis_dump(buf) +#endif diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_rxtx.c b/src/seastar/dpdk/drivers/net/netvsc/hn_rxtx.c new file mode 100644 index 000000000..7d7b55778 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_rxtx.c @@ -0,0 +1,1470 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2018 Microsoft Corporation + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. + * All rights reserved. + */ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> +#include <strings.h> +#include <malloc.h> + +#include <rte_ethdev.h> +#include <rte_memcpy.h> +#include <rte_string_fns.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_ether.h> +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_dev.h> +#include <rte_net.h> +#include <rte_bus_vmbus.h> +#include <rte_spinlock.h> + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_rndis.h" +#include "hn_nvs.h" +#include "ndis.h" + +#define HN_NVS_SEND_MSG_SIZE \ + (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) + +#define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ +#define HN_TXCOPY_THRESHOLD 512 + +#define HN_RXCOPY_THRESHOLD 256 +#define HN_RXQ_EVENT_DEFAULT 2048 + +struct hn_rxinfo { + uint32_t vlan_info; + uint32_t csum_info; + uint32_t hash_info; + uint32_t hash_value; +}; + +#define HN_RXINFO_VLAN 0x0001 +#define HN_RXINFO_CSUM 0x0002 +#define HN_RXINFO_HASHINF 0x0004 +#define HN_RXINFO_HASHVAL 0x0008 +#define HN_RXINFO_ALL \ + (HN_RXINFO_VLAN | \ + HN_RXINFO_CSUM | \ + HN_RXINFO_HASHINF | \ + HN_RXINFO_HASHVAL) + +#define HN_NDIS_VLAN_INFO_INVALID 0xffffffff +#define HN_NDIS_RXCSUM_INFO_INVALID 0 +#define HN_NDIS_HASH_INFO_INVALID 0 + +/* + * Per-transmit book keeping. + * A slot in transmit ring (chim_index) is reserved for each transmit. + * + * There are two types of transmit: + * - buffered transmit where chimney buffer is used and RNDIS header + * is in the buffer. mbuf == NULL for this case. + * + * - direct transmit where RNDIS header is in the in rndis_pkt + * mbuf is freed after transmit. + * + * Descriptors come from per-port pool which is used + * to limit number of outstanding requests per device. + */ +struct hn_txdesc { + struct rte_mbuf *m; + + uint16_t queue_id; + uint16_t chim_index; + uint32_t chim_size; + uint32_t data_size; + uint32_t packets; + + struct rndis_packet_msg *rndis_pkt; +}; + +#define HN_RNDIS_PKT_LEN \ + (sizeof(struct rndis_packet_msg) + \ + RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ + RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ + RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ + RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) + +/* Minimum space required for a packet */ +#define HN_PKTSIZE_MIN(align) \ + RTE_ALIGN(ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) + +#define DEFAULT_TX_FREE_THRESH 32U + +static void +hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) +{ + uint32_t s = m->pkt_len; + const struct ether_addr *ea; + + if (s == 64) { + stats->size_bins[1]++; + } else if (s > 64 && s < 1024) { + uint32_t bin; + + /* count zeros, and offset into correct bin */ + bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; + stats->size_bins[bin]++; + } else { + if (s < 64) + stats->size_bins[0]++; + else if (s < 1519) + stats->size_bins[6]++; + else + stats->size_bins[7]++; + } + + ea = rte_pktmbuf_mtod(m, const struct ether_addr *); + if (is_multicast_ether_addr(ea)) { + if (is_broadcast_ether_addr(ea)) + stats->broadcast++; + else + stats->multicast++; + } +} + +static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) +{ + return pkt->pktinfooffset + pkt->pktinfolen; +} + +static inline uint32_t +hn_rndis_pktmsg_offset(uint32_t ofs) +{ + return ofs - offsetof(struct rndis_packet_msg, dataoffset); +} + +static void hn_txd_init(struct rte_mempool *mp __rte_unused, + void *opaque, void *obj, unsigned int idx) +{ + struct hn_txdesc *txd = obj; + struct rte_eth_dev *dev = opaque; + struct rndis_packet_msg *pkt; + + memset(txd, 0, sizeof(*txd)); + txd->chim_index = idx; + + pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN, + rte_align32pow2(HN_RNDIS_PKT_LEN), + dev->device->numa_node); + if (!pkt) + rte_exit(EXIT_FAILURE, "can not allocate RNDIS header"); + + txd->rndis_pkt = pkt; +} + +/* + * Unlike Linux and FreeBSD, this driver uses a mempool + * to limit outstanding transmits and reserve buffers + */ +int +hn_tx_pool_init(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + char name[RTE_MEMPOOL_NAMESIZE]; + struct rte_mempool *mp; + + snprintf(name, sizeof(name), + "hn_txd_%u", dev->data->port_id); + + PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d", + name, hv->chim_cnt, sizeof(struct hn_txdesc), + dev->device->numa_node); + + mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc), + HN_TXD_CACHE_SIZE, 0, + NULL, NULL, + hn_txd_init, dev, + dev->device->numa_node, 0); + if (!mp) { + PMD_DRV_LOG(ERR, + "mempool %s create failed: %d", name, rte_errno); + return -rte_errno; + } + + hv->tx_pool = mp; + return 0; +} + +void +hn_tx_pool_uninit(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + if (hv->tx_pool) { + rte_mempool_free(hv->tx_pool); + hv->tx_pool = NULL; + } +} + +static void hn_reset_txagg(struct hn_tx_queue *txq) +{ + txq->agg_szleft = txq->agg_szmax; + txq->agg_pktleft = txq->agg_pktmax; + txq->agg_txd = NULL; + txq->agg_prevpkt = NULL; +} + +int +hn_dev_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc __rte_unused, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) + +{ + struct hn_data *hv = dev->data->dev_private; + struct hn_tx_queue *txq; + uint32_t tx_free_thresh; + int err; + + PMD_INIT_FUNC_TRACE(); + + txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, + socket_id); + if (!txq) + return -ENOMEM; + + txq->hv = hv; + txq->chan = hv->channels[queue_idx]; + txq->port_id = dev->data->port_id; + txq->queue_id = queue_idx; + + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) + tx_free_thresh = RTE_MIN(hv->chim_cnt / 4, + DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh >= hv->chim_cnt - 3) + tx_free_thresh = hv->chim_cnt - 3; + + txq->free_thresh = tx_free_thresh; + + txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); + txq->agg_pktmax = hv->rndis_agg_pkts; + txq->agg_align = hv->rndis_agg_align; + + hn_reset_txagg(txq); + + err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc, + socket_id, tx_conf); + if (err) { + rte_free(txq); + return err; + } + + dev->data->tx_queues[queue_idx] = txq; + return 0; +} + +void +hn_dev_tx_queue_release(void *arg) +{ + struct hn_tx_queue *txq = arg; + struct hn_txdesc *txd; + + PMD_INIT_FUNC_TRACE(); + + if (!txq) + return; + + /* If any pending data is still present just drop it */ + txd = txq->agg_txd; + if (txd) + rte_mempool_put(txq->hv->tx_pool, txd); + + rte_free(txq); +} + +static void +hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, + unsigned long xactid, const struct hn_nvs_rndis_ack *ack) +{ + struct hn_txdesc *txd = (struct hn_txdesc *)xactid; + struct hn_tx_queue *txq; + + /* Control packets are sent with xacid == 0 */ + if (!txd) + return; + + txq = dev->data->tx_queues[queue_id]; + if (likely(ack->status == NVS_STATUS_OK)) { + PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", + txq->port_id, txq->queue_id, txd->chim_index, + txd->packets, txd->data_size); + txq->stats.bytes += txd->data_size; + txq->stats.packets += txd->packets; + } else { + PMD_TX_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", + txq->port_id, txq->queue_id, txd->chim_index, ack->status); + ++txq->stats.errors; + } + + rte_pktmbuf_free(txd->m); + + rte_mempool_put(txq->hv->tx_pool, txd); +} + +/* Handle transmit completion events */ +static void +hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, + const struct vmbus_chanpkt_hdr *pkt, + const void *data) +{ + const struct hn_nvs_hdr *hdr = data; + + switch (hdr->type) { + case NVS_TYPE_RNDIS_ACK: + hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); + break; + + default: + PMD_TX_LOG(NOTICE, + "unexpected send completion type %u", + hdr->type); + } +} + +/* Parse per-packet info (meta data) */ +static int +hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, + struct hn_rxinfo *info) +{ + const struct rndis_pktinfo *pi = info_data; + uint32_t mask = 0; + + while (info_dlen != 0) { + const void *data; + uint32_t dlen; + + if (unlikely(info_dlen < sizeof(*pi))) + return -EINVAL; + + if (unlikely(info_dlen < pi->size)) + return -EINVAL; + info_dlen -= pi->size; + + if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) + return -EINVAL; + if (unlikely(pi->size < pi->offset)) + return -EINVAL; + + dlen = pi->size - pi->offset; + data = pi->data; + + switch (pi->type) { + case NDIS_PKTINFO_TYPE_VLAN: + if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) + return -EINVAL; + info->vlan_info = *((const uint32_t *)data); + mask |= HN_RXINFO_VLAN; + break; + + case NDIS_PKTINFO_TYPE_CSUM: + if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) + return -EINVAL; + info->csum_info = *((const uint32_t *)data); + mask |= HN_RXINFO_CSUM; + break; + + case NDIS_PKTINFO_TYPE_HASHVAL: + if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) + return -EINVAL; + info->hash_value = *((const uint32_t *)data); + mask |= HN_RXINFO_HASHVAL; + break; + + case NDIS_PKTINFO_TYPE_HASHINF: + if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) + return -EINVAL; + info->hash_info = *((const uint32_t *)data); + mask |= HN_RXINFO_HASHINF; + break; + + default: + goto next; + } + + if (mask == HN_RXINFO_ALL) + break; /* All found; done */ +next: + pi = (const struct rndis_pktinfo *) + ((const uint8_t *)pi + pi->size); + } + + /* + * Final fixup. + * - If there is no hash value, invalidate the hash info. + */ + if (!(mask & HN_RXINFO_HASHVAL)) + info->hash_info = HN_NDIS_HASH_INFO_INVALID; + return 0; +} + +/* + * Ack the consumed RXBUF associated w/ this channel packet, + * so that this RXBUF can be recycled by the hypervisor. + */ +static void hn_rx_buf_release(struct hn_rx_bufinfo *rxb) +{ + struct rte_mbuf_ext_shared_info *shinfo = &rxb->shinfo; + struct hn_data *hv = rxb->hv; + + if (rte_mbuf_ext_refcnt_update(shinfo, -1) == 0) { + hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); + --hv->rxbuf_outstanding; + } +} + +static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) +{ + hn_rx_buf_release(opaque); +} + +static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq, + const struct vmbus_chanpkt_rxbuf *pkt) +{ + struct hn_rx_bufinfo *rxb; + + rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid; + rxb->chan = rxq->chan; + rxb->xactid = pkt->hdr.xactid; + rxb->hv = rxq->hv; + + rxb->shinfo.free_cb = hn_rx_buf_free_cb; + rxb->shinfo.fcb_opaque = rxb; + rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); + return rxb; +} + +static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, + uint8_t *data, unsigned int headroom, unsigned int dlen, + const struct hn_rxinfo *info) +{ + struct hn_data *hv = rxq->hv; + struct rte_mbuf *m; + + m = rte_pktmbuf_alloc(rxq->mb_pool); + if (unlikely(!m)) { + struct rte_eth_dev *dev = + &rte_eth_devices[rxq->port_id]; + + dev->data->rx_mbuf_alloc_failed++; + return; + } + + /* + * For large packets, avoid copy if possible but need to keep + * some space available in receive area for later packets. + */ + if (dlen >= HN_RXCOPY_THRESHOLD && + hv->rxbuf_outstanding < hv->rxbuf_section_cnt / 2) { + struct rte_mbuf_ext_shared_info *shinfo; + const void *rxbuf; + rte_iova_t iova; + + /* + * Build an external mbuf that points to recveive area. + * Use refcount to handle multiple packets in same + * receive buffer section. + */ + rxbuf = hv->rxbuf_res->addr; + iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); + shinfo = &rxb->shinfo; + + if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 1) + ++hv->rxbuf_outstanding; + + rte_pktmbuf_attach_extbuf(m, data, iova, + dlen + headroom, shinfo); + m->data_off = headroom; + } else { + /* Mbuf's in pool must be large enough to hold small packets */ + if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { + rte_pktmbuf_free_seg(m); + ++rxq->stats.errors; + return; + } + rte_memcpy(rte_pktmbuf_mtod(m, void *), + data + headroom, dlen); + } + + m->port = rxq->port_id; + m->pkt_len = dlen; + m->data_len = dlen; + m->packet_type = rte_net_get_ptype(m, NULL, + RTE_PTYPE_L2_MASK | + RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK); + + if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { + m->vlan_tci = info->vlan_info; + m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN; + + /* NDIS always strips tag, put it back if necessary */ + if (!hv->vlan_strip && rte_vlan_insert(&m)) { + PMD_DRV_LOG(DEBUG, "vlan insert failed"); + ++rxq->stats.errors; + rte_pktmbuf_free(m); + return; + } + } + + if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { + if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) + m->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + + if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK + | NDIS_RXCSUM_INFO_TCPCS_OK)) + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED + | NDIS_RXCSUM_INFO_UDPCS_FAILED)) + m->ol_flags |= PKT_RX_L4_CKSUM_BAD; + } + + if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { + m->ol_flags |= PKT_RX_RSS_HASH; + m->hash.rss = info->hash_value; + } + + PMD_RX_LOG(DEBUG, + "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64, + rxq->port_id, rxq->queue_id, rxb->xactid, + m->pkt_len, m->packet_type, m->ol_flags); + + ++rxq->stats.packets; + rxq->stats.bytes += m->pkt_len; + hn_update_packet_stats(&rxq->stats, m); + + if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { + ++rxq->stats.ring_full; + rte_pktmbuf_free(m); + } +} + +static void hn_rndis_rx_data(struct hn_rx_queue *rxq, + struct hn_rx_bufinfo *rxb, + void *data, uint32_t dlen) +{ + unsigned int data_off, data_len, pktinfo_off, pktinfo_len; + const struct rndis_packet_msg *pkt = data; + struct hn_rxinfo info = { + .vlan_info = HN_NDIS_VLAN_INFO_INVALID, + .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, + .hash_info = HN_NDIS_HASH_INFO_INVALID, + }; + int err; + + hn_rndis_dump(pkt); + + if (unlikely(dlen < sizeof(*pkt))) + goto error; + + if (unlikely(dlen < pkt->len)) + goto error; /* truncated RNDIS from host */ + + if (unlikely(pkt->len < pkt->datalen + + pkt->oobdatalen + pkt->pktinfolen)) + goto error; + + if (unlikely(pkt->datalen == 0)) + goto error; + + /* Check offsets. */ + if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) + goto error; + + if (likely(pkt->pktinfooffset > 0) && + unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || + (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) + goto error; + + data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); + data_len = pkt->datalen; + pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); + pktinfo_len = pkt->pktinfolen; + + if (likely(pktinfo_len > 0)) { + err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, + pktinfo_len, &info); + if (err) + goto error; + } + + if (unlikely(data_off + data_len > pkt->len)) + goto error; + + if (unlikely(data_len < ETHER_HDR_LEN)) + goto error; + + hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); + return; +error: + ++rxq->stats.errors; +} + +static void +hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq, + struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) +{ + const struct rndis_msghdr *hdr = buf; + + switch (hdr->type) { + case RNDIS_PACKET_MSG: + if (dev->data->dev_started) + hn_rndis_rx_data(rxq, rxb, buf, len); + break; + + case RNDIS_INDICATE_STATUS_MSG: + hn_rndis_link_status(dev, buf); + break; + + case RNDIS_INITIALIZE_CMPLT: + case RNDIS_QUERY_CMPLT: + case RNDIS_SET_CMPLT: + hn_rndis_receive_response(rxq->hv, buf, len); + break; + + default: + PMD_DRV_LOG(NOTICE, + "unexpected RNDIS message (type %#x len %u)", + hdr->type, len); + break; + } +} + +static void +hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, + struct hn_data *hv, + struct hn_rx_queue *rxq, + const struct vmbus_chanpkt_hdr *hdr, + const void *buf) +{ + const struct vmbus_chanpkt_rxbuf *pkt; + const struct hn_nvs_hdr *nvs_hdr = buf; + uint32_t rxbuf_sz = hv->rxbuf_res->len; + char *rxbuf = hv->rxbuf_res->addr; + unsigned int i, hlen, count; + struct hn_rx_bufinfo *rxb; + + /* At minimum we need type header */ + if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { + PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); + return; + } + + /* Make sure that this is a RNDIS message. */ + if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { + PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", + nvs_hdr->type); + return; + } + + hlen = vmbus_chanpkt_getlen(hdr->hlen); + if (unlikely(hlen < sizeof(*pkt))) { + PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); + return; + } + + pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); + if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { + PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", + pkt->rxbuf_id); + return; + } + + count = pkt->rxbuf_cnt; + if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, + rxbuf[count]))) { + PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); + return; + } + + if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { + PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, + pkt->hdr.xactid); + return; + } + + /* Setup receive buffer info to allow for callback */ + rxb = hn_rx_buf_init(rxq, pkt); + + /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ + for (i = 0; i < count; ++i) { + unsigned int ofs, len; + + ofs = pkt->rxbuf[i].ofs; + len = pkt->rxbuf[i].len; + + if (unlikely(ofs + len > rxbuf_sz)) { + PMD_RX_LOG(ERR, + "%uth RNDIS msg overflow ofs %u, len %u", + i, ofs, len); + continue; + } + + if (unlikely(len == 0)) { + PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); + continue; + } + + hn_rndis_receive(dev, rxq, rxb, + rxbuf + ofs, len); + } + + /* Send ACK now if external mbuf not used */ + hn_rx_buf_release(rxb); +} + +/* + * Called when NVS inband events are received. + * Send up a two part message with port_id and the NVS message + * to the pipe to the netvsc-vf-event control thread. + */ +static void hn_nvs_handle_notify(struct rte_eth_dev *dev, + const struct vmbus_chanpkt_hdr *pkt, + const void *data) +{ + const struct hn_nvs_hdr *hdr = data; + + switch (hdr->type) { + case NVS_TYPE_TXTBL_NOTE: + /* Transmit indirection table has locking problems + * in DPDK and therefore not implemented + */ + PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table"); + break; + + case NVS_TYPE_VFASSOC_NOTE: + hn_nvs_handle_vfassoc(dev, pkt, data); + break; + + default: + PMD_DRV_LOG(INFO, + "got notify, nvs type %u", hdr->type); + } +} + +struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, + uint16_t queue_id, + unsigned int socket_id) +{ + struct hn_rx_queue *rxq; + + rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), + RTE_CACHE_LINE_SIZE, socket_id); + if (!rxq) + return NULL; + + rxq->hv = hv; + rxq->chan = hv->channels[queue_id]; + rte_spinlock_init(&rxq->ring_lock); + rxq->port_id = hv->port_id; + rxq->queue_id = queue_id; + rxq->event_sz = HN_RXQ_EVENT_DEFAULT; + rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT, + RTE_CACHE_LINE_SIZE, socket_id); + if (!rxq->event_buf) { + rte_free(rxq); + return NULL; + } + + return rxq; +} + +int +hn_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + struct hn_data *hv = dev->data->dev_private; + char ring_name[RTE_RING_NAMESIZE]; + struct hn_rx_queue *rxq; + unsigned int count; + int error = -ENOMEM; + + PMD_INIT_FUNC_TRACE(); + + if (queue_idx == 0) { + rxq = hv->primary; + } else { + rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); + if (!rxq) + return -ENOMEM; + } + + rxq->mb_pool = mp; + count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues; + if (nb_desc == 0 || nb_desc > count) + nb_desc = count; + + /* + * Staging ring from receive event logic to rx_pkts. + * rx_pkts assumes caller is handling multi-thread issue. + * event logic has locking. + */ + snprintf(ring_name, sizeof(ring_name), + "hn_rx_%u_%u", dev->data->port_id, queue_idx); + rxq->rx_ring = rte_ring_create(ring_name, + rte_align32pow2(nb_desc), + socket_id, 0); + if (!rxq->rx_ring) + goto fail; + + error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc, + socket_id, rx_conf, mp); + if (error) + goto fail; + + dev->data->rx_queues[queue_idx] = rxq; + return 0; + +fail: + rte_ring_free(rxq->rx_ring); + rte_free(rxq->event_buf); + rte_free(rxq); + return error; +} + +static void +hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary) +{ + + if (!rxq) + return; + + rte_ring_free(rxq->rx_ring); + rxq->rx_ring = NULL; + rxq->mb_pool = NULL; + + hn_vf_rx_queue_release(rxq->hv, rxq->queue_id); + + /* Keep primary queue to allow for control operations */ + if (keep_primary && rxq == rxq->hv->primary) + return; + + rte_free(rxq->event_buf); + rte_free(rxq); +} + +void +hn_dev_rx_queue_release(void *arg) +{ + struct hn_rx_queue *rxq = arg; + + PMD_INIT_FUNC_TRACE(); + + hn_rx_queue_free(rxq, true); +} + +int +hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt) +{ + struct hn_tx_queue *txq = arg; + + return hn_process_events(txq->hv, txq->queue_id, free_cnt); +} + +/* + * Process pending events on the channel. + * Called from both Rx queue poll and Tx cleanup + */ +uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, + uint32_t tx_limit) +{ + struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; + struct hn_rx_queue *rxq; + uint32_t bytes_read = 0; + uint32_t tx_done = 0; + int ret = 0; + + rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; + + /* If no pending data then nothing to do */ + if (rte_vmbus_chan_rx_empty(rxq->chan)) + return 0; + + /* + * Since channel is shared between Rx and TX queue need to have a lock + * since DPDK does not force same CPU to be used for Rx/Tx. + */ + if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) + return 0; + + for (;;) { + const struct vmbus_chanpkt_hdr *pkt; + uint32_t len = rxq->event_sz; + const void *data; + +retry: + ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); + if (ret == -EAGAIN) + break; /* ring is empty */ + + if (unlikely(ret == -ENOBUFS)) { + /* event buffer not large enough to read ring */ + + PMD_DRV_LOG(DEBUG, + "event buffer expansion (need %u)", len); + rxq->event_sz = len + len / 4; + rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz, + RTE_CACHE_LINE_SIZE); + if (rxq->event_buf) + goto retry; + /* out of memory, no more events now */ + rxq->event_sz = 0; + break; + } + + if (unlikely(ret <= 0)) { + /* This indicates a failure to communicate (or worse) */ + rte_exit(EXIT_FAILURE, + "vmbus ring buffer error: %d", ret); + } + + bytes_read += ret; + pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; + data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); + + switch (pkt->type) { + case VMBUS_CHANPKT_TYPE_COMP: + ++tx_done; + hn_nvs_handle_comp(dev, queue_id, pkt, data); + break; + + case VMBUS_CHANPKT_TYPE_RXBUF: + hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); + break; + + case VMBUS_CHANPKT_TYPE_INBAND: + hn_nvs_handle_notify(dev, pkt, data); + break; + + default: + PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); + break; + } + + if (tx_limit && tx_done >= tx_limit) + break; + + if (rxq->rx_ring && rte_ring_full(rxq->rx_ring)) + break; + } + + if (bytes_read > 0) + rte_vmbus_chan_signal_read(rxq->chan, bytes_read); + + rte_spinlock_unlock(&rxq->ring_lock); + + return tx_done; +} + +static void hn_append_to_chim(struct hn_tx_queue *txq, + struct rndis_packet_msg *pkt, + const struct rte_mbuf *m) +{ + struct hn_txdesc *txd = txq->agg_txd; + uint8_t *buf = (uint8_t *)pkt; + unsigned int data_offs; + + hn_rndis_dump(pkt); + + data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); + txd->chim_size += pkt->len; + txd->data_size += m->pkt_len; + ++txd->packets; + hn_update_packet_stats(&txq->stats, m); + + for (; m; m = m->next) { + uint16_t len = rte_pktmbuf_data_len(m); + + rte_memcpy(buf + data_offs, + rte_pktmbuf_mtod(m, const char *), len); + data_offs += len; + } +} + +/* + * Send pending aggregated data in chimney buffer (if any). + * Returns error if send was unsuccessful because channel ring buffer + * was full. + */ +static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) + +{ + struct hn_txdesc *txd = txq->agg_txd; + struct hn_nvs_rndis rndis; + int ret; + + if (!txd) + return 0; + + rndis = (struct hn_nvs_rndis) { + .type = NVS_TYPE_RNDIS, + .rndis_mtype = NVS_RNDIS_MTYPE_DATA, + .chim_idx = txd->chim_index, + .chim_sz = txd->chim_size, + }; + + PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", + txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); + + ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, + &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); + + if (likely(ret == 0)) + hn_reset_txagg(txq); + else + PMD_TX_LOG(NOTICE, "port %u:%u send failed: %d", + txq->port_id, txq->queue_id, ret); + + return ret; +} + +static struct hn_txdesc *hn_new_txd(struct hn_data *hv, + struct hn_tx_queue *txq) +{ + struct hn_txdesc *txd; + + if (rte_mempool_get(hv->tx_pool, (void **)&txd)) { + ++txq->stats.ring_full; + PMD_TX_LOG(DEBUG, "tx pool exhausted!"); + return NULL; + } + + txd->m = NULL; + txd->queue_id = txq->queue_id; + txd->packets = 0; + txd->data_size = 0; + txd->chim_size = 0; + + return txd; +} + +static void * +hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) +{ + struct hn_txdesc *agg_txd = txq->agg_txd; + struct rndis_packet_msg *pkt; + void *chim; + + if (agg_txd) { + unsigned int padding, olen; + + /* + * Update the previous RNDIS packet's total length, + * it can be increased due to the mandatory alignment + * padding for this RNDIS packet. And update the + * aggregating txdesc's chimney sending buffer size + * accordingly. + * + * Zero-out the padding, as required by the RNDIS spec. + */ + pkt = txq->agg_prevpkt; + olen = pkt->len; + padding = RTE_ALIGN(olen, txq->agg_align) - olen; + if (padding > 0) { + agg_txd->chim_size += padding; + pkt->len += padding; + memset((uint8_t *)pkt + olen, 0, padding); + } + + chim = (uint8_t *)pkt + pkt->len; + + txq->agg_pktleft--; + txq->agg_szleft -= pktsize; + if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { + /* + * Probably can't aggregate more packets, + * flush this aggregating txdesc proactively. + */ + txq->agg_pktleft = 0; + } + } else { + agg_txd = hn_new_txd(hv, txq); + if (!agg_txd) + return NULL; + + chim = (uint8_t *)hv->chim_res->addr + + agg_txd->chim_index * hv->chim_szmax; + + txq->agg_txd = agg_txd; + txq->agg_pktleft = txq->agg_pktmax - 1; + txq->agg_szleft = txq->agg_szmax - pktsize; + } + txq->agg_prevpkt = chim; + + return chim; +} + +static inline void * +hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, + uint32_t pi_dlen, uint32_t pi_type) +{ + const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); + struct rndis_pktinfo *pi; + + /* + * Per-packet-info does not move; it only grows. + * + * NOTE: + * pktinfooffset in this phase counts from the beginning + * of rndis_packet_msg. + */ + pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); + + pkt->pktinfolen += pi_size; + + pi->size = pi_size; + pi->type = pi_type; + pi->offset = RNDIS_PKTINFO_OFFSET; + + return pi->data; +} + +/* Put RNDIS header and packet info on packet */ +static void hn_encap(struct rndis_packet_msg *pkt, + uint16_t queue_id, + const struct rte_mbuf *m) +{ + unsigned int hlen = m->l2_len + m->l3_len; + uint32_t *pi_data; + uint32_t pkt_hlen; + + pkt->type = RNDIS_PACKET_MSG; + pkt->len = m->pkt_len; + pkt->dataoffset = 0; + pkt->datalen = m->pkt_len; + pkt->oobdataoffset = 0; + pkt->oobdatalen = 0; + pkt->oobdataelements = 0; + pkt->pktinfooffset = sizeof(*pkt); + pkt->pktinfolen = 0; + pkt->vchandle = 0; + pkt->reserved = 0; + + /* + * Set the hash value for this packet, to the queue_id to cause + * TX done event for this packet on the right channel. + */ + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, + NDIS_PKTINFO_TYPE_HASHVAL); + *pi_data = queue_id; + + if (m->ol_flags & PKT_TX_VLAN_PKT) { + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, + NDIS_PKTINFO_TYPE_VLAN); + *pi_data = m->vlan_tci; + } + + if (m->ol_flags & PKT_TX_TCP_SEG) { + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, + NDIS_PKTINFO_TYPE_LSO); + + if (m->ol_flags & PKT_TX_IPV6) { + *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, + m->tso_segsz); + } else { + *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, + m->tso_segsz); + } + } else if (m->ol_flags & + (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) { + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, + NDIS_PKTINFO_TYPE_CSUM); + *pi_data = 0; + + if (m->ol_flags & PKT_TX_IPV6) + *pi_data |= NDIS_TXCSUM_INFO_IPV6; + if (m->ol_flags & PKT_TX_IPV4) { + *pi_data |= NDIS_TXCSUM_INFO_IPV4; + + if (m->ol_flags & PKT_TX_IP_CKSUM) + *pi_data |= NDIS_TXCSUM_INFO_IPCS; + } + + if (m->ol_flags & PKT_TX_TCP_CKSUM) + *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); + else if (m->ol_flags & PKT_TX_UDP_CKSUM) + *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); + } + + pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; + /* Fixup RNDIS packet message total length */ + pkt->len += pkt_hlen; + + /* Convert RNDIS packet message offsets */ + pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); + pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); +} + +/* How many scatter gather list elements ar needed */ +static unsigned int hn_get_slots(const struct rte_mbuf *m) +{ + unsigned int slots = 1; /* for RNDIS header */ + + while (m) { + unsigned int size = rte_pktmbuf_data_len(m); + unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; + + slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE; + m = m->next; + } + + return slots; +} + +/* Build scatter gather list from chained mbuf */ +static unsigned int hn_fill_sg(struct vmbus_gpa *sg, + const struct rte_mbuf *m) +{ + unsigned int segs = 0; + + while (m) { + rte_iova_t addr = rte_mbuf_data_iova(m); + unsigned int page = addr / PAGE_SIZE; + unsigned int offset = addr & PAGE_MASK; + unsigned int len = rte_pktmbuf_data_len(m); + + while (len > 0) { + unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset); + + sg[segs].page = page; + sg[segs].ofs = offset; + sg[segs].len = bytes; + segs++; + + ++page; + offset = 0; + len -= bytes; + } + m = m->next; + } + + return segs; +} + +/* Transmit directly from mbuf */ +static int hn_xmit_sg(struct hn_tx_queue *txq, + const struct hn_txdesc *txd, const struct rte_mbuf *m, + bool *need_sig) +{ + struct vmbus_gpa sg[hn_get_slots(m)]; + struct hn_nvs_rndis nvs_rndis = { + .type = NVS_TYPE_RNDIS, + .rndis_mtype = NVS_RNDIS_MTYPE_DATA, + .chim_sz = txd->chim_size, + }; + rte_iova_t addr; + unsigned int segs; + + /* attach aggregation data if present */ + if (txd->chim_size > 0) + nvs_rndis.chim_idx = txd->chim_index; + else + nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; + + hn_rndis_dump(txd->rndis_pkt); + + /* pass IOVA of rndis header in first segment */ + addr = rte_malloc_virt2iova(txd->rndis_pkt); + if (unlikely(addr == RTE_BAD_IOVA)) { + PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova"); + return -EINVAL; + } + + sg[0].page = addr / PAGE_SIZE; + sg[0].ofs = addr & PAGE_MASK; + sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); + segs = 1; + + hn_update_packet_stats(&txq->stats, m); + + segs += hn_fill_sg(sg + 1, m); + + PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", + txq->port_id, txq->queue_id, txd->chim_index, + segs, nvs_rndis.chim_sz); + + return hn_nvs_send_sglist(txq->chan, sg, segs, + &nvs_rndis, sizeof(nvs_rndis), + (uintptr_t)txd, need_sig); +} + +uint16_t +hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct hn_tx_queue *txq = ptxq; + uint16_t queue_id = txq->queue_id; + struct hn_data *hv = txq->hv; + struct rte_eth_dev *vf_dev; + bool need_sig = false; + uint16_t nb_tx; + int ret; + + if (unlikely(hv->closed)) + return 0; + + /* Transmit over VF if present and up */ + vf_dev = hn_get_vf_dev(hv); + + if (vf_dev && vf_dev->data->dev_started) { + void *sub_q = vf_dev->data->tx_queues[queue_id]; + + return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts); + } + + if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh) + hn_process_events(hv, txq->queue_id, 0); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + struct rte_mbuf *m = tx_pkts[nb_tx]; + uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; + struct rndis_packet_msg *pkt; + + /* For small packets aggregate them in chimney buffer */ + if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { + /* If this packet will not fit, then flush */ + if (txq->agg_pktleft == 0 || + RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { + if (hn_flush_txagg(txq, &need_sig)) + goto fail; + } + + pkt = hn_try_txagg(hv, txq, pkt_size); + if (unlikely(!pkt)) + break; + + hn_encap(pkt, queue_id, m); + hn_append_to_chim(txq, pkt, m); + + rte_pktmbuf_free(m); + + /* if buffer is full, flush */ + if (txq->agg_pktleft == 0 && + hn_flush_txagg(txq, &need_sig)) + goto fail; + } else { + struct hn_txdesc *txd; + + /* can send chimney data and large packet at once */ + txd = txq->agg_txd; + if (txd) { + hn_reset_txagg(txq); + } else { + txd = hn_new_txd(hv, txq); + if (unlikely(!txd)) + break; + } + + pkt = txd->rndis_pkt; + txd->m = m; + txd->data_size += m->pkt_len; + ++txd->packets; + + hn_encap(pkt, queue_id, m); + + ret = hn_xmit_sg(txq, txd, m, &need_sig); + if (unlikely(ret != 0)) { + PMD_TX_LOG(NOTICE, "sg send failed: %d", ret); + ++txq->stats.errors; + rte_mempool_put(hv->tx_pool, txd); + goto fail; + } + } + } + + /* If partial buffer left, then try and send it. + * if that fails, then reuse it on next send. + */ + hn_flush_txagg(txq, &need_sig); + +fail: + if (need_sig) + rte_vmbus_chan_signal_tx(txq->chan); + + return nb_tx; +} + +static uint16_t +hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq, + struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + uint16_t i, n; + + if (unlikely(nb_pkts == 0)) + return 0; + + n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts); + + /* relabel the received mbufs */ + for (i = 0; i < n; i++) + rx_pkts[i]->port = rxq->port_id; + + return n; +} + +uint16_t +hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct hn_rx_queue *rxq = prxq; + struct hn_data *hv = rxq->hv; + struct rte_eth_dev *vf_dev; + uint16_t nb_rcv; + + if (unlikely(hv->closed)) + return 0; + + /* Receive from VF if present and up */ + vf_dev = hn_get_vf_dev(hv); + + /* Check for new completions */ + if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts)) + hn_process_events(hv, rxq->queue_id, 0); + + /* Always check the vmbus path for multicast and new flows */ + nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring, + (void **)rx_pkts, nb_pkts, NULL); + + /* If VF is available, check that as well */ + if (vf_dev && vf_dev->data->dev_started) + nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq, + rx_pkts + nb_rcv, nb_pkts - nb_rcv); + + return nb_rcv; +} + +void +hn_dev_free_queues(struct rte_eth_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + hn_rx_queue_free(rxq, false); + dev->data->rx_queues[i] = NULL; + } + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + hn_dev_tx_queue_release(dev->data->tx_queues[i]); + dev->data->tx_queues[i] = NULL; + } + dev->data->nb_tx_queues = 0; +} diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_var.h b/src/seastar/dpdk/drivers/net/netvsc/hn_var.h new file mode 100644 index 000000000..de885d898 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_var.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2009-2018 Microsoft Corp. + * Copyright (c) 2016 Brocade Communications Systems, Inc. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + */ + +/* + * Tunable ethdev params + */ +#define HN_MIN_RX_BUF_SIZE 1024 +#define HN_MAX_XFER_LEN 2048 +#define HN_MAX_MAC_ADDRS 1 +#define HN_MAX_CHANNELS 64 + +/* Claimed to be 12232B */ +#define HN_MTU_MAX (9 * 1024) + +/* Retry interval */ +#define HN_CHAN_INTERVAL_US 100 + +/* Host monitor interval */ +#define HN_CHAN_LATENCY_NS 50000 + +/* Buffers need to be aligned */ +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#ifndef PAGE_MASK +#define PAGE_MASK (PAGE_SIZE - 1) +#endif + +struct hn_data; +struct hn_txdesc; + +struct hn_stats { + uint64_t packets; + uint64_t bytes; + uint64_t errors; + uint64_t ring_full; + uint64_t multicast; + uint64_t broadcast; + /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */ + uint64_t size_bins[8]; +}; + +struct hn_tx_queue { + struct hn_data *hv; + struct vmbus_channel *chan; + uint16_t port_id; + uint16_t queue_id; + uint32_t free_thresh; + + /* Applied packet transmission aggregation limits. */ + uint32_t agg_szmax; + uint32_t agg_pktmax; + uint32_t agg_align; + + /* Packet transmission aggregation states */ + struct hn_txdesc *agg_txd; + uint32_t agg_pktleft; + uint32_t agg_szleft; + struct rndis_packet_msg *agg_prevpkt; + + struct hn_stats stats; +}; + +struct hn_rx_queue { + struct hn_data *hv; + struct vmbus_channel *chan; + struct rte_mempool *mb_pool; + struct rte_ring *rx_ring; + + rte_spinlock_t ring_lock; + uint32_t event_sz; + uint16_t port_id; + uint16_t queue_id; + struct hn_stats stats; + + void *event_buf; +}; + + +/* multi-packet data from host */ +struct hn_rx_bufinfo { + struct vmbus_channel *chan; + struct hn_data *hv; + uint64_t xactid; + struct rte_mbuf_ext_shared_info shinfo; +} __rte_cache_aligned; + +#define HN_INVALID_PORT UINT16_MAX + +struct hn_data { + struct rte_vmbus_device *vmbus; + struct hn_rx_queue *primary; + rte_spinlock_t vf_lock; + uint16_t port_id; + uint16_t vf_port; + + uint8_t vf_present; + uint8_t closed; + uint8_t vlan_strip; + + uint32_t link_status; + uint32_t link_speed; + + struct rte_mem_resource *rxbuf_res; /* UIO resource for Rx */ + struct hn_rx_bufinfo *rxbuf_info; + uint32_t rxbuf_section_cnt; /* # of Rx sections */ + volatile uint32_t rxbuf_outstanding; + uint16_t max_queues; /* Max available queues */ + uint16_t num_queues; + uint64_t rss_offloads; + + struct rte_mem_resource *chim_res; /* UIO resource for Tx */ + struct rte_mempool *tx_pool; /* Tx descriptors */ + uint32_t chim_szmax; /* Max size per buffer */ + uint32_t chim_cnt; /* Max packets per buffer */ + + uint32_t latency; + uint32_t nvs_ver; + uint32_t ndis_ver; + uint32_t rndis_agg_size; + uint32_t rndis_agg_pkts; + uint32_t rndis_agg_align; + + volatile uint32_t rndis_pending; + rte_atomic32_t rndis_req_id; + uint8_t rndis_resp[256]; + + struct ether_addr mac_addr; + + struct rte_eth_dev_owner owner; + struct rte_intr_handle vf_intr; + + struct vmbus_channel *channels[HN_MAX_CHANNELS]; +}; + +static inline struct vmbus_channel * +hn_primary_chan(const struct hn_data *hv) +{ + return hv->channels[0]; +} + +uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id, + uint32_t tx_limit); + +uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +int hn_tx_pool_init(struct rte_eth_dev *dev); +void hn_tx_pool_uninit(struct rte_eth_dev *dev); +int hn_dev_link_update(struct rte_eth_dev *dev, int wait); +int hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +void hn_dev_tx_queue_release(void *arg); +void hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx, + struct rte_eth_txq_info *qinfo); +int hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt); + +struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, + uint16_t queue_id, + unsigned int socket_id); +int hn_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp); +void hn_dev_rx_queue_release(void *arg); +void hn_dev_free_queues(struct rte_eth_dev *dev); + +/* Check if VF is attached */ +static inline bool +hn_vf_attached(const struct hn_data *hv) +{ + return hv->vf_port != HN_INVALID_PORT; +} + +/* Get VF device for existing netvsc device */ +static inline struct rte_eth_dev * +hn_get_vf_dev(const struct hn_data *hv) +{ + uint16_t vf_port = hv->vf_port; + + /* make sure vf_port is loaded */ + rte_smp_rmb(); + + if (vf_port == HN_INVALID_PORT) + return NULL; + else + return &rte_eth_devices[vf_port]; +} + +void hn_vf_info_get(struct hn_data *hv, + struct rte_eth_dev_info *info); +int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv); +int hn_vf_configure(struct rte_eth_dev *dev, + const struct rte_eth_conf *dev_conf); +const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev); +int hn_vf_start(struct rte_eth_dev *dev); +void hn_vf_reset(struct rte_eth_dev *dev); +void hn_vf_stop(struct rte_eth_dev *dev); +void hn_vf_close(struct rte_eth_dev *dev); + +void hn_vf_allmulticast_enable(struct rte_eth_dev *dev); +void hn_vf_allmulticast_disable(struct rte_eth_dev *dev); +void hn_vf_promiscuous_enable(struct rte_eth_dev *dev); +void hn_vf_promiscuous_disable(struct rte_eth_dev *dev); +int hn_vf_mc_addr_list(struct rte_eth_dev *dev, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); + +int hn_vf_link_update(struct rte_eth_dev *dev, + int wait_to_complete); +int hn_vf_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id); +int hn_vf_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp); +void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id); + +int hn_vf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); +void hn_vf_stats_reset(struct rte_eth_dev *dev); +int hn_vf_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + unsigned int size); +int hn_vf_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, + unsigned int n); +void hn_vf_xstats_reset(struct rte_eth_dev *dev); diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_vf.c b/src/seastar/dpdk/drivers/net/netvsc/hn_vf.c new file mode 100644 index 000000000..b980bb8a4 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/hn_vf.c @@ -0,0 +1,555 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * All rights reserved. + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdbool.h> +#include <errno.h> +#include <unistd.h> +#include <dirent.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/uio.h> + +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> +#include <rte_lcore.h> +#include <rte_memory.h> +#include <rte_bus_vmbus.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_log.h> +#include <rte_string_fns.h> + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_nvs.h" + +/* Search for VF with matching MAC address, return port id */ +static int hn_vf_match(const struct rte_eth_dev *dev) +{ + const struct ether_addr *mac = dev->data->mac_addrs; + int i; + + RTE_ETH_FOREACH_DEV(i) { + const struct rte_eth_dev *vf_dev = &rte_eth_devices[i]; + const struct ether_addr *vf_mac = vf_dev->data->mac_addrs; + + if (vf_dev == dev) + continue; + + if (is_same_ether_addr(mac, vf_mac)) + return i; + } + return -ENOENT; +} + + +/* + * Attach new PCI VF device and return the port_id + */ +static int hn_vf_attach(struct hn_data *hv, uint16_t port_id) +{ + struct rte_eth_dev_owner owner = { .id = RTE_ETH_DEV_NO_OWNER }; + int ret; + + if (hn_vf_attached(hv)) { + PMD_DRV_LOG(ERR, "VF already attached"); + return -EEXIST; + } + + ret = rte_eth_dev_owner_get(port_id, &owner); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Can not find owner for port %d", port_id); + return ret; + } + + if (owner.id != RTE_ETH_DEV_NO_OWNER) { + PMD_DRV_LOG(ERR, "Port %u already owned by other device %s", + port_id, owner.name); + return -EBUSY; + } + + ret = rte_eth_dev_owner_set(port_id, &hv->owner); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Can set owner for port %d", port_id); + return ret; + } + + PMD_DRV_LOG(DEBUG, "Attach VF device %u", port_id); + hv->vf_port = port_id; + rte_smp_wmb(); + + return 0; +} + +/* Add new VF device to synthetic device */ +int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv) +{ + int port, err; + + port = hn_vf_match(dev); + if (port < 0) { + PMD_DRV_LOG(NOTICE, "No matching MAC found"); + return port; + } + + rte_spinlock_lock(&hv->vf_lock); + err = hn_vf_attach(hv, port); + + if (err == 0) { + dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + hv->vf_intr = (struct rte_intr_handle) { + .fd = -1, + .type = RTE_INTR_HANDLE_EXT, + }; + dev->intr_handle = &hv->vf_intr; + hn_nvs_set_datapath(hv, NVS_DATAPATH_VF); + } + rte_spinlock_unlock(&hv->vf_lock); + + return err; +} + +/* Remove new VF device */ +static void hn_vf_remove(struct hn_data *hv) +{ + + rte_spinlock_lock(&hv->vf_lock); + + if (!hn_vf_attached(hv)) { + PMD_DRV_LOG(ERR, "VF path not active"); + } else { + /* Stop incoming packets from arriving on VF */ + hn_nvs_set_datapath(hv, NVS_DATAPATH_SYNTHETIC); + + /* Stop transmission over VF */ + hv->vf_port = HN_INVALID_PORT; + rte_smp_wmb(); + + /* Give back ownership */ + rte_eth_dev_owner_unset(hv->vf_port, hv->owner.id); + } + rte_spinlock_unlock(&hv->vf_lock); +} + +/* Handle VF association message from host */ +void +hn_nvs_handle_vfassoc(struct rte_eth_dev *dev, + const struct vmbus_chanpkt_hdr *hdr, + const void *data) +{ + struct hn_data *hv = dev->data->dev_private; + const struct hn_nvs_vf_association *vf_assoc = data; + + if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*vf_assoc))) { + PMD_DRV_LOG(ERR, "invalid vf association NVS"); + return; + } + + PMD_DRV_LOG(DEBUG, "VF serial %u %s port %u", + vf_assoc->serial, + vf_assoc->allocated ? "add to" : "remove from", + dev->data->port_id); + + hv->vf_present = vf_assoc->allocated; + + if (dev->state != RTE_ETH_DEV_ATTACHED) + return; + + if (vf_assoc->allocated) + hn_vf_add(dev, hv); + else + hn_vf_remove(hv); +} + +/* + * Merge the info from the VF and synthetic path. + * use the default config of the VF + * and the minimum number of queues and buffer sizes. + */ +static void hn_vf_info_merge(struct rte_eth_dev *vf_dev, + struct rte_eth_dev_info *info) +{ + struct rte_eth_dev_info vf_info; + + rte_eth_dev_info_get(vf_dev->data->port_id, &vf_info); + + info->speed_capa = vf_info.speed_capa; + info->default_rxportconf = vf_info.default_rxportconf; + info->default_txportconf = vf_info.default_txportconf; + + info->max_rx_queues = RTE_MIN(vf_info.max_rx_queues, + info->max_rx_queues); + info->rx_offload_capa &= vf_info.rx_offload_capa; + info->rx_queue_offload_capa &= vf_info.rx_queue_offload_capa; + info->flow_type_rss_offloads &= vf_info.flow_type_rss_offloads; + + info->max_tx_queues = RTE_MIN(vf_info.max_tx_queues, + info->max_tx_queues); + info->tx_offload_capa &= vf_info.tx_offload_capa; + info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa; + + info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize, + info->min_rx_bufsize); + info->max_rx_pktlen = RTE_MAX(vf_info.max_rx_pktlen, + info->max_rx_pktlen); +} + +void hn_vf_info_get(struct hn_data *hv, struct rte_eth_dev_info *info) +{ + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev) + hn_vf_info_merge(vf_dev, info); + rte_spinlock_unlock(&hv->vf_lock); +} + +int hn_vf_link_update(struct rte_eth_dev *dev, + int wait_to_complete) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->dev_ops->link_update) + ret = (*vf_dev->dev_ops->link_update)(vf_dev, wait_to_complete); + rte_spinlock_unlock(&hv->vf_lock); + + return ret; +} + +/* called when VF has link state interrupts enabled */ +static int hn_vf_lsc_event(uint16_t port_id __rte_unused, + enum rte_eth_event_type event, + void *cb_arg, void *out __rte_unused) +{ + struct rte_eth_dev *dev = cb_arg; + + if (event != RTE_ETH_EVENT_INTR_LSC) + return 0; + + /* if link state has changed pass on */ + if (hn_dev_link_update(dev, 0) == 0) + return 0; /* no change */ + + return _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC, + NULL); +} + +static int _hn_vf_configure(struct rte_eth_dev *dev, + uint16_t vf_port, + const struct rte_eth_conf *dev_conf) +{ + struct rte_eth_conf vf_conf = *dev_conf; + struct rte_eth_dev *vf_dev; + int ret; + + vf_dev = &rte_eth_devices[vf_port]; + if (dev_conf->intr_conf.lsc && + (vf_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { + PMD_DRV_LOG(DEBUG, "enabling LSC for VF %u", + vf_port); + vf_conf.intr_conf.lsc = 1; + } else { + PMD_DRV_LOG(DEBUG, "disabling LSC for VF %u", + vf_port); + vf_conf.intr_conf.lsc = 0; + } + + ret = rte_eth_dev_configure(vf_port, + dev->data->nb_rx_queues, + dev->data->nb_tx_queues, + &vf_conf); + if (ret) { + PMD_DRV_LOG(ERR, + "VF configuration failed: %d", ret); + } else if (vf_conf.intr_conf.lsc) { + ret = rte_eth_dev_callback_register(vf_port, + RTE_ETH_DEV_INTR_LSC, + hn_vf_lsc_event, dev); + if (ret) + PMD_DRV_LOG(ERR, + "Failed to register LSC callback for VF %u", + vf_port); + } + return ret; +} + +/* + * Configure VF if present. + * Force VF to have same number of queues as synthetic device + */ +int hn_vf_configure(struct rte_eth_dev *dev, + const struct rte_eth_conf *dev_conf) +{ + struct hn_data *hv = dev->data->dev_private; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + if (hv->vf_port != HN_INVALID_PORT) + ret = _hn_vf_configure(dev, hv->vf_port, dev_conf); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + const uint32_t *ptypes = NULL; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->dev_ops->dev_supported_ptypes_get) + ptypes = (*vf_dev->dev_ops->dev_supported_ptypes_get)(vf_dev); + rte_spinlock_unlock(&hv->vf_lock); + + return ptypes; +} + +int hn_vf_start(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev) + ret = rte_eth_dev_start(vf_dev->data->port_id); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +void hn_vf_stop(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev) + rte_eth_dev_stop(vf_dev->data->port_id); + rte_spinlock_unlock(&hv->vf_lock); +} + +/* If VF is present, then cascade configuration down */ +#define VF_ETHDEV_FUNC(dev, func) \ + { \ + struct hn_data *hv = (dev)->data->dev_private; \ + struct rte_eth_dev *vf_dev; \ + rte_spinlock_lock(&hv->vf_lock); \ + vf_dev = hn_get_vf_dev(hv); \ + if (vf_dev) \ + func(vf_dev->data->port_id); \ + rte_spinlock_unlock(&hv->vf_lock); \ + } + +void hn_vf_reset(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_dev_reset); +} + +void hn_vf_close(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + uint16_t vf_port; + + rte_spinlock_lock(&hv->vf_lock); + vf_port = hv->vf_port; + if (vf_port != HN_INVALID_PORT) + rte_eth_dev_close(vf_port); + + hv->vf_port = HN_INVALID_PORT; + rte_spinlock_unlock(&hv->vf_lock); +} + +void hn_vf_stats_reset(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_stats_reset); +} + +void hn_vf_allmulticast_enable(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_allmulticast_enable); +} + +void hn_vf_allmulticast_disable(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_allmulticast_disable); +} + +void hn_vf_promiscuous_enable(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_promiscuous_enable); +} + +void hn_vf_promiscuous_disable(struct rte_eth_dev *dev) +{ + VF_ETHDEV_FUNC(dev, rte_eth_promiscuous_disable); +} + +int hn_vf_mc_addr_list(struct rte_eth_dev *dev, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev) + ret = rte_eth_dev_set_mc_addr_list(vf_dev->data->port_id, + mc_addr_set, nb_mc_addr); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +int hn_vf_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev) + ret = rte_eth_tx_queue_setup(vf_dev->data->port_id, + queue_idx, nb_desc, + socket_id, tx_conf); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id) +{ + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->dev_ops->tx_queue_release) { + void *subq = vf_dev->data->tx_queues[queue_id]; + + (*vf_dev->dev_ops->tx_queue_release)(subq); + } + + rte_spinlock_unlock(&hv->vf_lock); +} + +int hn_vf_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev) + ret = rte_eth_rx_queue_setup(vf_dev->data->port_id, + queue_idx, nb_desc, + socket_id, rx_conf, mp); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id) +{ + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->dev_ops->rx_queue_release) { + void *subq = vf_dev->data->rx_queues[queue_id]; + + (*vf_dev->dev_ops->rx_queue_release)(subq); + } + rte_spinlock_unlock(&hv->vf_lock); +} + +int hn_vf_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int ret = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev) + ret = rte_eth_stats_get(vf_dev->data->port_id, stats); + rte_spinlock_unlock(&hv->vf_lock); + return ret; +} + +int hn_vf_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *names, + unsigned int n) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int i, count = 0; + char tmp[RTE_ETH_XSTATS_NAME_SIZE]; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->dev_ops->xstats_get_names) + count = vf_dev->dev_ops->xstats_get_names(vf_dev, names, n); + rte_spinlock_unlock(&hv->vf_lock); + + /* add vf_ prefix to xstat names */ + if (names) { + for (i = 0; i < count; i++) { + snprintf(tmp, sizeof(tmp), "vf_%s", names[i].name); + strlcpy(names[i].name, tmp, sizeof(names[i].name)); + } + } + + return count; +} + +int hn_vf_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, + unsigned int n) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + int count = 0; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->dev_ops->xstats_get) + count = vf_dev->dev_ops->xstats_get(vf_dev, xstats, n); + rte_spinlock_unlock(&hv->vf_lock); + + return count; +} + +void hn_vf_xstats_reset(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_dev *vf_dev; + + rte_spinlock_lock(&hv->vf_lock); + vf_dev = hn_get_vf_dev(hv); + if (vf_dev && vf_dev->dev_ops->xstats_reset) + vf_dev->dev_ops->xstats_reset(vf_dev); + rte_spinlock_unlock(&hv->vf_lock); +} diff --git a/src/seastar/dpdk/drivers/net/netvsc/meson.build b/src/seastar/dpdk/drivers/net/netvsc/meson.build new file mode 100644 index 000000000..c84269716 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/meson.build @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Microsoft Corporation + +build = dpdk_conf.has('RTE_LIBRTE_VMBUS_BUS') +version = 2 +sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c', 'hn_vf.c') + +deps += ['bus_vmbus' ] + +allow_experimental_apis = true diff --git a/src/seastar/dpdk/drivers/net/netvsc/ndis.h b/src/seastar/dpdk/drivers/net/netvsc/ndis.h new file mode 100644 index 000000000..2e7ca99b1 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/ndis.h @@ -0,0 +1,378 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * All rights reserved. + */ + +#ifndef _NET_NDIS_H_ +#define _NET_NDIS_H_ + +#define NDIS_MEDIA_STATE_CONNECTED 0 +#define NDIS_MEDIA_STATE_DISCONNECTED 1 + +#define NDIS_NETCHANGE_TYPE_POSSIBLE 1 +#define NDIS_NETCHANGE_TYPE_DEFINITE 2 +#define NDIS_NETCHANGE_TYPE_FROMMEDIA 3 + +#define NDIS_OFFLOAD_SET_NOCHG 0 +#define NDIS_OFFLOAD_SET_ON 1 +#define NDIS_OFFLOAD_SET_OFF 2 + +/* a.k.a GRE MAC */ +#define NDIS_ENCAP_TYPE_NVGRE 0x00000001 + +#define NDIS_HASH_FUNCTION_MASK 0x000000FF /* see hash function */ +#define NDIS_HASH_TYPE_MASK 0x00FFFF00 /* see hash type */ + +/* hash function */ +#define NDIS_HASH_FUNCTION_TOEPLITZ 0x00000001 + +/* hash type */ +#define NDIS_HASH_IPV4 0x00000100 +#define NDIS_HASH_TCP_IPV4 0x00000200 +#define NDIS_HASH_IPV6 0x00000400 +#define NDIS_HASH_IPV6_EX 0x00000800 +#define NDIS_HASH_TCP_IPV6 0x00001000 +#define NDIS_HASH_TCP_IPV6_EX 0x00002000 + +#define NDIS_HASH_KEYSIZE_TOEPLITZ 40 +#define NDIS_HASH_INDCNT 128 + +#define NDIS_OBJTYPE_DEFAULT 0x80 +#define NDIS_OBJTYPE_RSS_CAPS 0x88 +#define NDIS_OBJTYPE_RSS_PARAMS 0x89 +#define NDIS_OBJTYPE_OFFLOAD 0xa7 + +struct ndis_object_hdr { + uint8_t ndis_type; /* NDIS_OBJTYPE_ */ + uint8_t ndis_rev; /* type specific */ + uint16_t ndis_size; /* incl. this hdr */ +} __rte_packed; + +/* + * OID_TCP_OFFLOAD_PARAMETERS + * ndis_type: NDIS_OBJTYPE_DEFAULT + */ +struct ndis_offload_params { + struct ndis_object_hdr ndis_hdr; + uint8_t ndis_ip4csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_tcp4csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_udp4csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_tcp6csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_udp6csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_lsov1; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_ipsecv1; /* NDIS_OFFLOAD_IPSECV1_ */ + uint8_t ndis_lsov2_ip4; /* NDIS_OFFLOAD_LSOV2_ */ + uint8_t ndis_lsov2_ip6; /* NDIS_OFFLOAD_LSOV2_ */ + uint8_t ndis_tcp4conn; /* 0 */ + uint8_t ndis_tcp6conn; /* 0 */ + uint32_t ndis_flags; /* 0 */ + /* NDIS >= 6.1 */ + uint8_t ndis_ipsecv2; /* NDIS_OFFLOAD_IPSECV2_ */ + uint8_t ndis_ipsecv2_ip4;/* NDIS_OFFLOAD_IPSECV2_ */ + /* NDIS >= 6.30 */ + uint8_t ndis_rsc_ip4; /* NDIS_OFFLOAD_RSC_ */ + uint8_t ndis_rsc_ip6; /* NDIS_OFFLOAD_RSC_ */ + uint8_t ndis_encap; /* NDIS_OFFLOAD_SET_ */ + uint8_t ndis_encap_types;/* NDIS_ENCAP_TYPE_ */ +}; + +#define NDIS_OFFLOAD_PARAMS_SIZE sizeof(struct ndis_offload_params) +#define NDIS_OFFLOAD_PARAMS_SIZE_6_1 \ + offsetof(struct ndis_offload_params, ndis_rsc_ip4) + +#define NDIS_OFFLOAD_PARAMS_REV_2 2 /* NDIS 6.1 */ +#define NDIS_OFFLOAD_PARAMS_REV_3 3 /* NDIS 6.30 */ + +#define NDIS_OFFLOAD_PARAM_NOCHG 0 /* common */ +#define NDIS_OFFLOAD_PARAM_OFF 1 +#define NDIS_OFFLOAD_PARAM_TX 2 +#define NDIS_OFFLOAD_PARAM_RX 3 +#define NDIS_OFFLOAD_PARAM_TXRX 4 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_LSOV1_OFF 1 +#define NDIS_OFFLOAD_LSOV1_ON 2 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_IPSECV1_OFF 1 +#define NDIS_OFFLOAD_IPSECV1_AH 2 +#define NDIS_OFFLOAD_IPSECV1_ESP 3 +#define NDIS_OFFLOAD_IPSECV1_AH_ESP 4 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_LSOV2_OFF 1 +#define NDIS_OFFLOAD_LSOV2_ON 2 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_IPSECV2_OFF 1 +#define NDIS_OFFLOAD_IPSECV2_AH 2 +#define NDIS_OFFLOAD_IPSECV2_ESP 3 +#define NDIS_OFFLOAD_IPSECV2_AH_ESP 4 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_RSC_OFF 1 +#define NDIS_OFFLOAD_RSC_ON 2 + +/* + * OID_GEN_RECEIVE_SCALE_CAPABILITIES + * ndis_type: NDIS_OBJTYPE_RSS_CAPS + */ +struct ndis_rss_caps { + struct ndis_object_hdr ndis_hdr; + uint32_t ndis_caps; /* NDIS_RSS_CAP_ */ + uint32_t ndis_nmsi; /* # of MSIs */ + uint32_t ndis_nrxr; /* # of RX rings */ + /* NDIS >= 6.30 */ + uint16_t ndis_nind; /* # of indtbl ent. */ + uint16_t ndis_pad; +} __rte_packed; + +#define NDIS_RSS_CAPS_SIZE \ + offsetof(struct ndis_rss_caps, ndis_pad) +#define NDIS_RSS_CAPS_SIZE_6_0 \ + offsetof(struct ndis_rss_caps, ndis_nind) + +#define NDIS_RSS_CAPS_REV_1 1 /* NDIS 6.{0,1,20} */ +#define NDIS_RSS_CAPS_REV_2 2 /* NDIS 6.30 */ + +#define NDIS_RSS_CAP_MSI 0x01000000 +#define NDIS_RSS_CAP_CLASSIFY_ISR 0x02000000 +#define NDIS_RSS_CAP_CLASSIFY_DPC 0x04000000 +#define NDIS_RSS_CAP_MSIX 0x08000000 +#define NDIS_RSS_CAP_IPV4 0x00000100 +#define NDIS_RSS_CAP_IPV6 0x00000200 +#define NDIS_RSS_CAP_IPV6_EX 0x00000400 +#define NDIS_RSS_CAP_HASH_TOEPLITZ NDIS_HASH_FUNCTION_TOEPLITZ +#define NDIS_RSS_CAP_HASHFUNC_MASK NDIS_HASH_FUNCTION_MASK + +/* + * OID_GEN_RECEIVE_SCALE_PARAMETERS + * ndis_type: NDIS_OBJTYPE_RSS_PARAMS + */ +struct ndis_rss_params { + struct ndis_object_hdr ndis_hdr; + uint16_t ndis_flags; /* NDIS_RSS_FLAG_ */ + uint16_t ndis_bcpu; /* base cpu 0 */ + uint32_t ndis_hash; /* NDIS_HASH_ */ + uint16_t ndis_indsize; /* indirect table */ + uint32_t ndis_indoffset; + uint16_t ndis_keysize; /* hash key */ + uint32_t ndis_keyoffset; + /* NDIS >= 6.20 */ + uint32_t ndis_cpumaskoffset; + uint32_t ndis_cpumaskcnt; + uint32_t ndis_cpumaskentsz; +}; + +#define NDIS_RSS_PARAMS_SIZE sizeof(struct ndis_rss_params) +#define NDIS_RSS_PARAMS_SIZE_6_0 \ + offsetof(struct ndis_rss_params, ndis_cpumaskoffset) + +#define NDIS_RSS_PARAMS_REV_1 1 /* NDIS 6.0 */ +#define NDIS_RSS_PARAMS_REV_2 2 /* NDIS 6.20 */ + +#define NDIS_RSS_FLAG_NONE 0x0000 +#define NDIS_RSS_FLAG_BCPU_UNCHG 0x0001 +#define NDIS_RSS_FLAG_HASH_UNCHG 0x0002 +#define NDIS_RSS_FLAG_IND_UNCHG 0x0004 +#define NDIS_RSS_FLAG_KEY_UNCHG 0x0008 +#define NDIS_RSS_FLAG_DISABLE 0x0010 + +/* non-standard convenient struct */ +struct ndis_rssprm_toeplitz { + struct ndis_rss_params rss_params; + /* Indirect table */ + uint32_t rss_ind[NDIS_HASH_INDCNT]; + /* Toeplitz hash key */ + uint8_t rss_key[NDIS_HASH_KEYSIZE_TOEPLITZ]; +}; + +#define NDIS_RSSPRM_TOEPLITZ_SIZE(nind) \ + offsetof(struct ndis_rssprm_toeplitz, rss_ind[nind]) + +/* + * OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES + * ndis_type: NDIS_OBJTYPE_OFFLOAD + */ + +#define NDIS_OFFLOAD_ENCAP_NONE 0x0000 +#define NDIS_OFFLOAD_ENCAP_NULL 0x0001 +#define NDIS_OFFLOAD_ENCAP_8023 0x0002 +#define NDIS_OFFLOAD_ENCAP_8023PQ 0x0004 +#define NDIS_OFFLOAD_ENCAP_8023PQ_OOB 0x0008 +#define NDIS_OFFLOAD_ENCAP_RFC1483 0x0010 + +struct ndis_csum_offload { + uint32_t ndis_ip4_txenc; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip4_txcsum; +#define NDIS_TXCSUM_CAP_IP4OPT 0x001 +#define NDIS_TXCSUM_CAP_TCP4OPT 0x004 +#define NDIS_TXCSUM_CAP_TCP4 0x010 +#define NDIS_TXCSUM_CAP_UDP4 0x040 +#define NDIS_TXCSUM_CAP_IP4 0x100 + uint32_t ndis_ip4_rxenc; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip4_rxcsum; +#define NDIS_RXCSUM_CAP_IP4OPT 0x001 +#define NDIS_RXCSUM_CAP_TCP4OPT 0x004 +#define NDIS_RXCSUM_CAP_TCP4 0x010 +#define NDIS_RXCSUM_CAP_UDP4 0x040 +#define NDIS_RXCSUM_CAP_IP4 0x100 + uint32_t ndis_ip6_txenc; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip6_txcsum; +#define NDIS_TXCSUM_CAP_IP6EXT 0x001 +#define NDIS_TXCSUM_CAP_TCP6OPT 0x004 +#define NDIS_TXCSUM_CAP_TCP6 0x010 +#define NDIS_TXCSUM_CAP_UDP6 0x040 + uint32_t ndis_ip6_rxenc; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip6_rxcsum; +#define NDIS_RXCSUM_CAP_IP6EXT 0x001 +#define NDIS_RXCSUM_CAP_TCP6OPT 0x004 +#define NDIS_RXCSUM_CAP_TCP6 0x010 +#define NDIS_RXCSUM_CAP_UDP6 0x040 +}; + +struct ndis_lsov1_offload { + uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_maxsize; + uint32_t ndis_minsegs; + uint32_t ndis_opts; +}; + +struct ndis_ipsecv1_offload { + uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ah_esp; + uint32_t ndis_xport_tun; + uint32_t ndis_ip4_opts; + uint32_t ndis_flags; + uint32_t ndis_ip4_ah; + uint32_t ndis_ip4_esp; +}; + +struct ndis_lsov2_offload { + uint32_t ndis_ip4_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip4_maxsz; + uint32_t ndis_ip4_minsg; + uint32_t ndis_ip6_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip6_maxsz; + uint32_t ndis_ip6_minsg; + uint32_t ndis_ip6_opts; +#define NDIS_LSOV2_CAP_IP6EXT 0x001 +#define NDIS_LSOV2_CAP_TCP6OPT 0x004 +}; + +struct ndis_ipsecv2_offload { + uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint16_t ndis_ip6; + uint16_t ndis_ip4opt; + uint16_t ndis_ip6ext; + uint16_t ndis_ah; + uint16_t ndis_esp; + uint16_t ndis_ah_esp; + uint16_t ndis_xport; + uint16_t ndis_tun; + uint16_t ndis_xport_tun; + uint16_t ndis_lso; + uint16_t ndis_extseq; + uint32_t ndis_udp_esp; + uint32_t ndis_auth; + uint32_t ndis_crypto; + uint32_t ndis_sa_caps; +}; + +struct ndis_rsc_offload { + uint16_t ndis_ip4; + uint16_t ndis_ip6; +}; + +struct ndis_encap_offload { + uint32_t ndis_flags; + uint32_t ndis_maxhdr; +}; + +struct ndis_offload { + struct ndis_object_hdr ndis_hdr; + struct ndis_csum_offload ndis_csum; + struct ndis_lsov1_offload ndis_lsov1; + struct ndis_ipsecv1_offload ndis_ipsecv1; + struct ndis_lsov2_offload ndis_lsov2; + uint32_t ndis_flags; + /* NDIS >= 6.1 */ + struct ndis_ipsecv2_offload ndis_ipsecv2; + /* NDIS >= 6.30 */ + struct ndis_rsc_offload ndis_rsc; + struct ndis_encap_offload ndis_encap_gre; +}; + +#define NDIS_OFFLOAD_SIZE sizeof(struct ndis_offload) +#define NDIS_OFFLOAD_SIZE_6_0 offsetof(struct ndis_offload, ndis_ipsecv2) +#define NDIS_OFFLOAD_SIZE_6_1 offsetof(struct ndis_offload, ndis_rsc) + +#define NDIS_OFFLOAD_REV_1 1 /* NDIS 6.0 */ +#define NDIS_OFFLOAD_REV_2 2 /* NDIS 6.1 */ +#define NDIS_OFFLOAD_REV_3 3 /* NDIS 6.30 */ + +/* + * Per-packet-info + */ + +/* VLAN */ +#define NDIS_VLAN_INFO_SIZE sizeof(uint32_t) +#define NDIS_VLAN_INFO_PRI_MASK 0x0007 +#define NDIS_VLAN_INFO_CFI_MASK 0x0008 +#define NDIS_VLAN_INFO_ID_MASK 0xfff0 +#define NDIS_VLAN_INFO_MAKE(id, pri, cfi) \ + (((pri) & NDIS_VLAN_INFO_PRI_MASK) | \ + (((cfi) & 0x1) << 3) | (((id) & 0xfff) << 4)) +#define NDIS_VLAN_INFO_ID(inf) (((inf) & NDIS_VLAN_INFO_ID_MASK) >> 4) +#define NDIS_VLAN_INFO_CFI(inf) (((inf) & NDIS_VLAN_INFO_CFI_MASK) >> 3) +#define NDIS_VLAN_INFO_PRI(inf) ((inf) & NDIS_VLAN_INFO_PRI_MASK) + +/* Reception checksum */ +#define NDIS_RXCSUM_INFO_SIZE sizeof(uint32_t) +#define NDIS_RXCSUM_INFO_TCPCS_FAILED 0x0001 +#define NDIS_RXCSUM_INFO_UDPCS_FAILED 0x0002 +#define NDIS_RXCSUM_INFO_IPCS_FAILED 0x0004 +#define NDIS_RXCSUM_INFO_TCPCS_OK 0x0008 +#define NDIS_RXCSUM_INFO_UDPCS_OK 0x0010 +#define NDIS_RXCSUM_INFO_IPCS_OK 0x0020 +#define NDIS_RXCSUM_INFO_LOOPBACK 0x0040 +#define NDIS_RXCSUM_INFO_TCPCS_INVAL 0x0080 +#define NDIS_RXCSUM_INFO_IPCS_INVAL 0x0100 + +/* LSOv2 */ +#define NDIS_LSO2_INFO_SIZE sizeof(uint32_t) +#define NDIS_LSO2_INFO_MSS_MASK 0x000fffff +#define NDIS_LSO2_INFO_THOFF_MASK 0x3ff00000 +#define NDIS_LSO2_INFO_ISLSO2 0x40000000 +#define NDIS_LSO2_INFO_ISIPV6 0x80000000 + +#define NDIS_LSO2_INFO_MAKE(thoff, mss) \ + ((((uint32_t)(mss)) & NDIS_LSO2_INFO_MSS_MASK) | \ + ((((uint32_t)(thoff)) & 0x3ff) << 20) | \ + NDIS_LSO2_INFO_ISLSO2) + +#define NDIS_LSO2_INFO_MAKEIPV4(thoff, mss) \ + NDIS_LSO2_INFO_MAKE((thoff), (mss)) + +#define NDIS_LSO2_INFO_MAKEIPV6(thoff, mss) \ + (NDIS_LSO2_INFO_MAKE((thoff), (mss)) | NDIS_LSO2_INFO_ISIPV6) + +/* Transmission checksum */ +#define NDIS_TXCSUM_INFO_SIZE sizeof(uint32_t) +#define NDIS_TXCSUM_INFO_IPV4 0x00000001 +#define NDIS_TXCSUM_INFO_IPV6 0x00000002 +#define NDIS_TXCSUM_INFO_TCPCS 0x00000004 +#define NDIS_TXCSUM_INFO_UDPCS 0x00000008 +#define NDIS_TXCSUM_INFO_IPCS 0x00000010 +#define NDIS_TXCSUM_INFO_THOFF 0x03ff0000 + +#define NDIS_TXCSUM_INFO_MKL4CS(thoff, flag) \ + ((((uint32_t)(thoff)) << 16) | (flag)) + +#define NDIS_TXCSUM_INFO_MKTCPCS(thoff) \ + NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_TCPCS) + +#define NDIS_TXCSUM_INFO_MKUDPCS(thoff) \ + NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_UDPCS) + +#endif /* !_NET_NDIS_H_ */ diff --git a/src/seastar/dpdk/drivers/net/netvsc/rndis.h b/src/seastar/dpdk/drivers/net/netvsc/rndis.h new file mode 100644 index 000000000..eac9a99fd --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/rndis.h @@ -0,0 +1,414 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * Copyright (c) 2010 Jonathan Armani <armani@openbsd.org> + * Copyright (c) 2010 Fabien Romano <fabien@openbsd.org> + * Copyright (c) 2010 Michael Knudsen <mk@openbsd.org> + * All rights reserved. + */ + +#ifndef _NET_RNDIS_H_ +#define _NET_RNDIS_H_ + +/* Canonical major/minor version as of 22th Aug. 2016. */ +#define RNDIS_VERSION_MAJOR 0x00000001 +#define RNDIS_VERSION_MINOR 0x00000000 + +#define RNDIS_STATUS_SUCCESS 0x00000000 +#define RNDIS_STATUS_PENDING 0x00000103 + +#define RNDIS_STATUS_ONLINE 0x40010003 +#define RNDIS_STATUS_RESET_START 0x40010004 +#define RNDIS_STATUS_RESET_END 0x40010005 +#define RNDIS_STATUS_RING_STATUS 0x40010006 +#define RNDIS_STATUS_CLOSED 0x40010007 +#define RNDIS_STATUS_WAN_LINE_UP 0x40010008 +#define RNDIS_STATUS_WAN_LINE_DOWN 0x40010009 +#define RNDIS_STATUS_WAN_FRAGMENT 0x4001000A +#define RNDIS_STATUS_MEDIA_CONNECT 0x4001000B +#define RNDIS_STATUS_MEDIA_DISCONNECT 0x4001000C +#define RNDIS_STATUS_HARDWARE_LINE_UP 0x4001000D +#define RNDIS_STATUS_HARDWARE_LINE_DOWN 0x4001000E +#define RNDIS_STATUS_INTERFACE_UP 0x4001000F +#define RNDIS_STATUS_INTERFACE_DOWN 0x40010010 +#define RNDIS_STATUS_MEDIA_BUSY 0x40010011 +#define RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION 0x40010012 +#define RNDIS_STATUS_WW_INDICATION RDIA_SPECIFIC_INDICATION +#define RNDIS_STATUS_LINK_SPEED_CHANGE 0x40010013 +#define RNDIS_STATUS_NETWORK_CHANGE 0x40010018 +#define RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG 0x40020006 + +#define RNDIS_STATUS_FAILURE 0xC0000001 +#define RNDIS_STATUS_RESOURCES 0xC000009A +#define RNDIS_STATUS_NOT_SUPPORTED 0xC00000BB +#define RNDIS_STATUS_CLOSING 0xC0010002 +#define RNDIS_STATUS_BAD_VERSION 0xC0010004 +#define RNDIS_STATUS_BAD_CHARACTERISTICS 0xC0010005 +#define RNDIS_STATUS_ADAPTER_NOT_FOUND 0xC0010006 +#define RNDIS_STATUS_OPEN_FAILED 0xC0010007 +#define RNDIS_STATUS_DEVICE_FAILED 0xC0010008 +#define RNDIS_STATUS_MULTICAST_FULL 0xC0010009 +#define RNDIS_STATUS_MULTICAST_EXISTS 0xC001000A +#define RNDIS_STATUS_MULTICAST_NOT_FOUND 0xC001000B +#define RNDIS_STATUS_REQUEST_ABORTED 0xC001000C +#define RNDIS_STATUS_RESET_IN_PROGRESS 0xC001000D +#define RNDIS_STATUS_CLOSING_INDICATING 0xC001000E +#define RNDIS_STATUS_INVALID_PACKET 0xC001000F +#define RNDIS_STATUS_OPEN_LIST_FULL 0xC0010010 +#define RNDIS_STATUS_ADAPTER_NOT_READY 0xC0010011 +#define RNDIS_STATUS_ADAPTER_NOT_OPEN 0xC0010012 +#define RNDIS_STATUS_NOT_INDICATING 0xC0010013 +#define RNDIS_STATUS_INVALID_LENGTH 0xC0010014 +#define RNDIS_STATUS_INVALID_DATA 0xC0010015 +#define RNDIS_STATUS_BUFFER_TOO_SHORT 0xC0010016 +#define RNDIS_STATUS_INVALID_OID 0xC0010017 +#define RNDIS_STATUS_ADAPTER_REMOVED 0xC0010018 +#define RNDIS_STATUS_UNSUPPORTED_MEDIA 0xC0010019 +#define RNDIS_STATUS_GROUP_ADDRESS_IN_US 0xC001001A +#define RNDIS_STATUS_FILE_NOT_FOUND 0xC001001B +#define RNDIS_STATUS_ERROR_READING_FILE 0xC001001C +#define RNDIS_STATUS_ALREADY_MAPPED 0xC001001D +#define RNDIS_STATUS_RESOURCE_CONFLICT 0xC001001E +#define RNDIS_STATUS_NO_CABLE 0xC001001F + +#define OID_GEN_SUPPORTED_LIST 0x00010101 +#define OID_GEN_HARDWARE_STATUS 0x00010102 +#define OID_GEN_MEDIA_SUPPORTED 0x00010103 +#define OID_GEN_MEDIA_IN_USE 0x00010104 +#define OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105 +#define OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106 +#define OID_GEN_LINK_SPEED 0x00010107 +#define OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108 +#define OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109 +#define OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A +#define OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B +#define OID_GEN_VENDOR_ID 0x0001010C +#define OID_GEN_VENDOR_DESCRIPTION 0x0001010D +#define OID_GEN_CURRENT_PACKET_FILTER 0x0001010E +#define OID_GEN_CURRENT_LOOKAHEAD 0x0001010F +#define OID_GEN_DRIVER_VERSION 0x00010110 +#define OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111 +#define OID_GEN_PROTOCOL_OPTIONS 0x00010112 +#define OID_GEN_MAC_OPTIONS 0x00010113 +#define OID_GEN_MEDIA_CONNECT_STATUS 0x00010114 +#define OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115 +#define OID_GEN_VENDOR_DRIVER_VERSION 0x00010116 +#define OID_GEN_SUPPORTED_GUIDS 0x00010117 +#define OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118 +#define OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119 +#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 +#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 +#define OID_GEN_MACHINE_NAME 0x0001021A +#define OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B +#define OID_GEN_VLAN_ID 0x0001021C + +#define OID_802_3_PERMANENT_ADDRESS 0x01010101 +#define OID_802_3_CURRENT_ADDRESS 0x01010102 +#define OID_802_3_MULTICAST_LIST 0x01010103 +#define OID_802_3_MAXIMUM_LIST_SIZE 0x01010104 +#define OID_802_3_MAC_OPTIONS 0x01010105 +#define OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101 +#define OID_802_3_XMIT_ONE_COLLISION 0x01020102 +#define OID_802_3_XMIT_MORE_COLLISIONS 0x01020103 +#define OID_802_3_XMIT_DEFERRED 0x01020201 +#define OID_802_3_XMIT_MAX_COLLISIONS 0x01020202 +#define OID_802_3_RCV_OVERRUN 0x01020203 +#define OID_802_3_XMIT_UNDERRUN 0x01020204 +#define OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205 +#define OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206 +#define OID_802_3_XMIT_LATE_COLLISIONS 0x01020207 + +#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C +#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D + +#define RNDIS_MEDIUM_802_3 0x00000000 + +/* Device flags */ +#define RNDIS_DF_CONNECTIONLESS 0x00000001 +#define RNDIS_DF_CONNECTION_ORIENTED 0x00000002 + +/* + * Common RNDIS message header. + */ +struct rndis_msghdr { + uint32_t type; + uint32_t len; +}; + +/* + * RNDIS data message + */ +#define RNDIS_PACKET_MSG 0x00000001 + +struct rndis_packet_msg { + uint32_t type; + uint32_t len; + uint32_t dataoffset; + uint32_t datalen; + uint32_t oobdataoffset; + uint32_t oobdatalen; + uint32_t oobdataelements; + uint32_t pktinfooffset; + uint32_t pktinfolen; + uint32_t vchandle; + uint32_t reserved; +}; + +/* + * Minimum value for dataoffset, oobdataoffset, and + * pktinfooffset. + */ +#define RNDIS_PACKET_MSG_OFFSET_MIN \ + (sizeof(struct rndis_packet_msg) - \ + offsetof(struct rndis_packet_msg, dataoffset)) + +/* Offset from the beginning of rndis_packet_msg. */ +#define RNDIS_PACKET_MSG_OFFSET_ABS(ofs) \ + ((ofs) + offsetof(struct rndis_packet_msg, dataoffset)) + +#define RNDIS_PACKET_MSG_OFFSET_ALIGN 4 +#define RNDIS_PACKET_MSG_OFFSET_ALIGNMASK \ + (RNDIS_PACKET_MSG_OFFSET_ALIGN - 1) + +/* Per-packet-info for RNDIS data message */ +struct rndis_pktinfo { + uint32_t size; + uint32_t type; /* NDIS_PKTINFO_TYPE_ */ + uint32_t offset; + uint8_t data[]; +}; + +#define RNDIS_PKTINFO_OFFSET \ + offsetof(struct rndis_pktinfo, data[0]) +#define RNDIS_PKTINFO_SIZE_ALIGN 4 +#define RNDIS_PKTINFO_SIZE_ALIGNMASK (RNDIS_PKTINFO_SIZE_ALIGN - 1) + +#define NDIS_PKTINFO_TYPE_CSUM 0 +#define NDIS_PKTINFO_TYPE_IPSEC 1 +#define NDIS_PKTINFO_TYPE_LSO 2 +#define NDIS_PKTINFO_TYPE_CLASSIFY 3 +/* reserved 4 */ +#define NDIS_PKTINFO_TYPE_SGLIST 5 +#define NDIS_PKTINFO_TYPE_VLAN 6 +#define NDIS_PKTINFO_TYPE_ORIG 7 +#define NDIS_PKTINFO_TYPE_PKT_CANCELID 8 +#define NDIS_PKTINFO_TYPE_ORIG_NBLIST 9 +#define NDIS_PKTINFO_TYPE_CACHE_NBLIST 10 +#define NDIS_PKTINFO_TYPE_PKT_PAD 11 + +/* RNDIS extension */ + +/* Per-packet hash info */ +#define NDIS_HASH_INFO_SIZE sizeof(uint32_t) +#define NDIS_PKTINFO_TYPE_HASHINF NDIS_PKTINFO_TYPE_ORIG_NBLIST +/* NDIS_HASH_ */ + +/* Per-packet hash value */ +#define NDIS_HASH_VALUE_SIZE sizeof(uint32_t) +#define NDIS_PKTINFO_TYPE_HASHVAL NDIS_PKTINFO_TYPE_PKT_CANCELID + +/* Per-packet-info size */ +#define RNDIS_PKTINFO_SIZE(dlen) offsetof(struct rndis_pktinfo, data[dlen]) + +/* + * RNDIS control messages + */ + +/* + * Common header for RNDIS completion messages. + * + * NOTE: It does not apply to RNDIS_RESET_CMPLT. + */ +struct rndis_comp_hdr { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; +}; + +/* Initialize the device. */ +#define RNDIS_INITIALIZE_MSG 0x00000002 +#define RNDIS_INITIALIZE_CMPLT 0x80000002 + +struct rndis_init_req { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t ver_major; + uint32_t ver_minor; + uint32_t max_xfersz; +}; + +struct rndis_init_comp { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; + uint32_t ver_major; + uint32_t ver_minor; + uint32_t devflags; + uint32_t medium; + uint32_t pktmaxcnt; + uint32_t pktmaxsz; + uint32_t align; + uint32_t aflistoffset; + uint32_t aflistsz; +}; + +#define RNDIS_INIT_COMP_SIZE_MIN \ + offsetof(struct rndis_init_comp, aflistsz) + +/* Halt the device. No response sent. */ +#define RNDIS_HALT_MSG 0x00000003 + +struct rndis_halt_req { + uint32_t type; + uint32_t len; + uint32_t rid; +}; + +/* Send a query object. */ +#define RNDIS_QUERY_MSG 0x00000004 +#define RNDIS_QUERY_CMPLT 0x80000004 + +struct rndis_query_req { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t oid; + uint32_t infobuflen; + uint32_t infobufoffset; + uint32_t devicevchdl; +}; + +#define RNDIS_QUERY_REQ_INFOBUFOFFSET \ + (sizeof(struct rndis_query_req) - \ + offsetof(struct rndis_query_req, rid)) + +struct rndis_query_comp { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; + uint32_t infobuflen; + uint32_t infobufoffset; +}; + +/* infobuf offset from the beginning of rndis_query_comp. */ +#define RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(ofs) \ + ((ofs) + offsetof(struct rndis_query_comp, rid)) + +/* Send a set object request. */ +#define RNDIS_SET_MSG 0x00000005 +#define RNDIS_SET_CMPLT 0x80000005 + +struct rndis_set_req { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t oid; + uint32_t infobuflen; + uint32_t infobufoffset; + uint32_t devicevchdl; +}; + +#define RNDIS_SET_REQ_INFOBUFOFFSET \ + (sizeof(struct rndis_set_req) - \ + offsetof(struct rndis_set_req, rid)) + +struct rndis_set_comp { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; +}; + +/* + * Parameter used by OID_GEN_RNDIS_CONFIG_PARAMETER. + */ +#define RNDIS_SET_PARAM_NUMERIC 0x00000000 +#define RNDIS_SET_PARAM_STRING 0x00000002 + +struct rndis_set_parameter { + uint32_t nameoffset; + uint32_t namelen; + uint32_t type; + uint32_t valueoffset; + uint32_t valuelen; +}; + +/* Perform a soft reset on the device. */ +#define RNDIS_RESET_MSG 0x00000006 +#define RNDIS_RESET_CMPLT 0x80000006 + +struct rndis_reset_req { + uint32_t type; + uint32_t len; + uint32_t rid; +}; + +struct rndis_reset_comp { + uint32_t type; + uint32_t len; + uint32_t status; + uint32_t adrreset; +}; + +/* 802.3 link-state or undefined message error. Sent by device. */ +#define RNDIS_INDICATE_STATUS_MSG 0x00000007 + +struct rndis_status_msg { + uint32_t type; + uint32_t len; + uint32_t status; + uint32_t stbuflen; + uint32_t stbufoffset; + /* rndis_diag_info */ +}; + +/* stbuf offset from the beginning of rndis_status_msg. */ +#define RNDIS_STBUFOFFSET_ABS(ofs) \ + ((ofs) + offsetof(struct rndis_status_msg, status)) + +/* + * Immediately after rndis_status_msg.stbufoffset, if a control + * message is malformatted, or a packet message contains inappropriate + * content. + */ +struct rndis_diag_info { + uint32_t diagstatus; + uint32_t erroffset; +}; + +/* Keepalive message. May be sent by device. */ +#define RNDIS_KEEPALIVE_MSG 0x00000008 +#define RNDIS_KEEPALIVE_CMPLT 0x80000008 + +struct rndis_keepalive_req { + uint32_t type; + uint32_t len; + uint32_t rid; +}; + +struct rndis_keepalive_comp { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; +}; + +/* Packet filter bits used by OID_GEN_CURRENT_PACKET_FILTER */ +#define NDIS_PACKET_TYPE_NONE 0x00000000 +#define NDIS_PACKET_TYPE_DIRECTED 0x00000001 +#define NDIS_PACKET_TYPE_MULTICAST 0x00000002 +#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004 +#define NDIS_PACKET_TYPE_BROADCAST 0x00000008 +#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010 +#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020 +#define NDIS_PACKET_TYPE_SMT 0x00000040 +#define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080 +#define NDIS_PACKET_TYPE_GROUP 0x00001000 +#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00002000 +#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00004000 +#define NDIS_PACKET_TYPE_MAC_FRAME 0x00008000 + +#endif /* !_NET_RNDIS_H_ */ diff --git a/src/seastar/dpdk/drivers/net/netvsc/rte_pmd_netvsc_version.map b/src/seastar/dpdk/drivers/net/netvsc/rte_pmd_netvsc_version.map new file mode 100644 index 000000000..d534019a6 --- /dev/null +++ b/src/seastar/dpdk/drivers/net/netvsc/rte_pmd_netvsc_version.map @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +DPDK_18.08 { + local: *; +}; |