summaryrefslogtreecommitdiffstats
path: root/src/seastar/dpdk/drivers/net/netvsc
diff options
context:
space:
mode:
Diffstat (limited to 'src/seastar/dpdk/drivers/net/netvsc')
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/Makefile24
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_ethdev.c904
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_logs.h36
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_nvs.c550
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_nvs.h238
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_rndis.c1134
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_rndis.h33
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_rxtx.c1470
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_var.h241
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/hn_vf.c555
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/meson.build10
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/ndis.h378
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/rndis.h414
-rw-r--r--src/seastar/dpdk/drivers/net/netvsc/rte_pmd_netvsc_version.map5
14 files changed, 5992 insertions, 0 deletions
diff --git a/src/seastar/dpdk/drivers/net/netvsc/Makefile b/src/seastar/dpdk/drivers/net/netvsc/Makefile
new file mode 100644
index 000000000..71482591a
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_pmd_netvsc.a
+
+CFLAGS += -O3 $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+EXPORT_MAP := rte_pmd_netvsc_version.map
+
+LIBABIVER := 1
+
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rndis.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_nvs.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_vf.c
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vmbus
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_ethdev.c b/src/seastar/dpdk/drivers/net/netvsc/hn_ethdev.c
new file mode 100644
index 000000000..553cb06f6
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_ethdev.c
@@ -0,0 +1,904 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Microsoft Corporation
+ * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_devargs.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ether.h>
+#include <rte_ethdev_driver.h>
+#include <rte_cycles.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_bus_vmbus.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_rndis.h"
+#include "hn_nvs.h"
+#include "ndis.h"
+
+#define HN_TX_OFFLOAD_CAPS (DEV_TX_OFFLOAD_IPV4_CKSUM | \
+ DEV_TX_OFFLOAD_TCP_CKSUM | \
+ DEV_TX_OFFLOAD_UDP_CKSUM | \
+ DEV_TX_OFFLOAD_TCP_TSO | \
+ DEV_TX_OFFLOAD_MULTI_SEGS | \
+ DEV_TX_OFFLOAD_VLAN_INSERT)
+
+#define HN_RX_OFFLOAD_CAPS (DEV_RX_OFFLOAD_CHECKSUM | \
+ DEV_RX_OFFLOAD_VLAN_STRIP)
+
+int hn_logtype_init;
+int hn_logtype_driver;
+
+struct hn_xstats_name_off {
+ char name[RTE_ETH_XSTATS_NAME_SIZE];
+ unsigned int offset;
+};
+
+static const struct hn_xstats_name_off hn_stat_strings[] = {
+ { "good_packets", offsetof(struct hn_stats, packets) },
+ { "good_bytes", offsetof(struct hn_stats, bytes) },
+ { "errors", offsetof(struct hn_stats, errors) },
+ { "ring full", offsetof(struct hn_stats, ring_full) },
+ { "multicast_packets", offsetof(struct hn_stats, multicast) },
+ { "broadcast_packets", offsetof(struct hn_stats, broadcast) },
+ { "undersize_packets", offsetof(struct hn_stats, size_bins[0]) },
+ { "size_64_packets", offsetof(struct hn_stats, size_bins[1]) },
+ { "size_65_127_packets", offsetof(struct hn_stats, size_bins[2]) },
+ { "size_128_255_packets", offsetof(struct hn_stats, size_bins[3]) },
+ { "size_256_511_packets", offsetof(struct hn_stats, size_bins[4]) },
+ { "size_512_1023_packets", offsetof(struct hn_stats, size_bins[5]) },
+ { "size_1024_1518_packets", offsetof(struct hn_stats, size_bins[6]) },
+ { "size_1519_max_packets", offsetof(struct hn_stats, size_bins[7]) },
+};
+
+static struct rte_eth_dev *
+eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size)
+{
+ struct rte_eth_dev *eth_dev;
+ const char *name;
+
+ if (!dev)
+ return NULL;
+
+ name = dev->device.name;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ eth_dev = rte_eth_dev_allocate(name);
+ if (!eth_dev) {
+ PMD_DRV_LOG(NOTICE, "can not allocate rte ethdev");
+ return NULL;
+ }
+
+ if (private_data_size) {
+ eth_dev->data->dev_private =
+ rte_zmalloc_socket(name, private_data_size,
+ RTE_CACHE_LINE_SIZE, dev->device.numa_node);
+ if (!eth_dev->data->dev_private) {
+ PMD_DRV_LOG(NOTICE, "can not allocate driver data");
+ rte_eth_dev_release_port(eth_dev);
+ return NULL;
+ }
+ }
+ } else {
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (!eth_dev) {
+ PMD_DRV_LOG(NOTICE, "can not attach secondary");
+ return NULL;
+ }
+ }
+
+ eth_dev->device = &dev->device;
+
+ /* interrupt is simulated */
+ dev->intr_handle.type = RTE_INTR_HANDLE_EXT;
+ eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+ eth_dev->intr_handle = &dev->intr_handle;
+
+ /* allow ethdev to remove on close */
+ eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
+
+ return eth_dev;
+}
+
+static void
+eth_dev_vmbus_release(struct rte_eth_dev *eth_dev)
+{
+ /* mac_addrs must not be freed alone because part of dev_private */
+ eth_dev->data->mac_addrs = NULL;
+ /* free ether device */
+ rte_eth_dev_release_port(eth_dev);
+
+ eth_dev->device = NULL;
+ eth_dev->intr_handle = NULL;
+}
+
+/* handle "latency=X" from devargs */
+static int hn_set_latency(const char *key, const char *value, void *opaque)
+{
+ struct hn_data *hv = opaque;
+ char *endp = NULL;
+ unsigned long lat;
+
+ errno = 0;
+ lat = strtoul(value, &endp, 0);
+
+ if (*value == '\0' || *endp != '\0') {
+ PMD_DRV_LOG(ERR, "invalid parameter %s=%s", key, value);
+ return -EINVAL;
+ }
+
+ PMD_DRV_LOG(DEBUG, "set latency %lu usec", lat);
+
+ hv->latency = lat * 1000; /* usec to nsec */
+ return 0;
+}
+
+/* Parse device arguments */
+static int hn_parse_args(const struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_devargs *devargs = dev->device->devargs;
+ static const char * const valid_keys[] = {
+ "latency",
+ NULL
+ };
+ struct rte_kvargs *kvlist;
+ int ret;
+
+ if (!devargs)
+ return 0;
+
+ PMD_INIT_LOG(DEBUG, "device args %s %s",
+ devargs->name, devargs->args);
+
+ kvlist = rte_kvargs_parse(devargs->args, valid_keys);
+ if (!kvlist) {
+ PMD_DRV_LOG(NOTICE, "invalid parameters");
+ return -EINVAL;
+ }
+
+ ret = rte_kvargs_process(kvlist, "latency", hn_set_latency, hv);
+ if (ret)
+ PMD_DRV_LOG(ERR, "Unable to process latency arg\n");
+
+ rte_kvargs_free(kvlist);
+ return ret;
+}
+
+/* Update link status.
+ * Note: the DPDK definition of "wait_to_complete"
+ * means block this call until link is up.
+ * which is not worth supporting.
+ */
+int
+hn_dev_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_link link, old;
+ int error;
+
+ old = dev->data->dev_link;
+
+ error = hn_rndis_get_linkstatus(hv);
+ if (error)
+ return error;
+
+ hn_rndis_get_linkspeed(hv);
+
+ hn_vf_link_update(dev, wait_to_complete);
+
+ link = (struct rte_eth_link) {
+ .link_duplex = ETH_LINK_FULL_DUPLEX,
+ .link_autoneg = ETH_LINK_SPEED_FIXED,
+ .link_speed = hv->link_speed / 10000,
+ };
+
+ if (hv->link_status == NDIS_MEDIA_STATE_CONNECTED)
+ link.link_status = ETH_LINK_UP;
+ else
+ link.link_status = ETH_LINK_DOWN;
+
+ if (old.link_status == link.link_status)
+ return 0;
+
+ PMD_INIT_LOG(DEBUG, "Port %d is %s", dev->data->port_id,
+ (link.link_status == ETH_LINK_UP) ? "up" : "down");
+
+ return rte_eth_linkstatus_set(dev, &link);
+}
+
+static void hn_dev_info_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *dev_info)
+{
+ struct hn_data *hv = dev->data->dev_private;
+
+ dev_info->speed_capa = ETH_LINK_SPEED_10G;
+ dev_info->min_rx_bufsize = HN_MIN_RX_BUF_SIZE;
+ dev_info->max_rx_pktlen = HN_MAX_XFER_LEN;
+ dev_info->max_mac_addrs = 1;
+
+ dev_info->hash_key_size = NDIS_HASH_KEYSIZE_TOEPLITZ;
+ dev_info->flow_type_rss_offloads =
+ ETH_RSS_IPV4 | ETH_RSS_IPV6 | ETH_RSS_TCP | ETH_RSS_UDP;
+
+ dev_info->max_rx_queues = hv->max_queues;
+ dev_info->max_tx_queues = hv->max_queues;
+
+ hn_rndis_get_offload(hv, dev_info);
+ hn_vf_info_get(hv, dev_info);
+}
+
+static void
+hn_dev_promiscuous_enable(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+
+ hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS);
+ hn_vf_promiscuous_enable(dev);
+}
+
+static void
+hn_dev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ uint32_t filter;
+
+ filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST;
+ if (dev->data->all_multicast)
+ filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
+ hn_rndis_set_rxfilter(hv, filter);
+ hn_vf_promiscuous_disable(dev);
+}
+
+static void
+hn_dev_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+
+ hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED |
+ NDIS_PACKET_TYPE_ALL_MULTICAST |
+ NDIS_PACKET_TYPE_BROADCAST);
+ hn_vf_allmulticast_enable(dev);
+}
+
+static void
+hn_dev_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+
+ hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED |
+ NDIS_PACKET_TYPE_BROADCAST);
+ hn_vf_allmulticast_disable(dev);
+}
+
+static int
+hn_dev_mc_addr_list(struct rte_eth_dev *dev,
+ struct ether_addr *mc_addr_set,
+ uint32_t nb_mc_addr)
+{
+ /* No filtering on the synthetic path, but can do it on VF */
+ return hn_vf_mc_addr_list(dev, mc_addr_set, nb_mc_addr);
+}
+
+/* Setup shared rx/tx queue data */
+static int hn_subchan_configure(struct hn_data *hv,
+ uint32_t subchan)
+{
+ struct vmbus_channel *primary = hn_primary_chan(hv);
+ int err;
+ unsigned int retry = 0;
+
+ PMD_DRV_LOG(DEBUG,
+ "open %u subchannels", subchan);
+
+ /* Send create sub channels command */
+ err = hn_nvs_alloc_subchans(hv, &subchan);
+ if (err)
+ return err;
+
+ while (subchan > 0) {
+ struct vmbus_channel *new_sc;
+ uint16_t chn_index;
+
+ err = rte_vmbus_subchan_open(primary, &new_sc);
+ if (err == -ENOENT && ++retry < 1000) {
+ /* This can happen if not ready yet */
+ rte_delay_ms(10);
+ continue;
+ }
+
+ if (err) {
+ PMD_DRV_LOG(ERR,
+ "open subchannel failed: %d", err);
+ return err;
+ }
+
+ rte_vmbus_set_latency(hv->vmbus, new_sc, hv->latency);
+
+ retry = 0;
+ chn_index = rte_vmbus_sub_channel_index(new_sc);
+ if (chn_index == 0 || chn_index > hv->max_queues) {
+ PMD_DRV_LOG(ERR,
+ "Invalid subchannel offermsg channel %u",
+ chn_index);
+ return -EIO;
+ }
+
+ PMD_DRV_LOG(DEBUG, "new sub channel %u", chn_index);
+ hv->channels[chn_index] = new_sc;
+ --subchan;
+ }
+
+ return err;
+}
+
+static int hn_dev_configure(struct rte_eth_dev *dev)
+{
+ const struct rte_eth_conf *dev_conf = &dev->data->dev_conf;
+ const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode;
+ const struct rte_eth_txmode *txmode = &dev_conf->txmode;
+
+ const struct rte_eth_rss_conf *rss_conf =
+ &dev_conf->rx_adv_conf.rss_conf;
+ struct hn_data *hv = dev->data->dev_private;
+ uint64_t unsupported;
+ int err, subchan;
+
+ PMD_INIT_FUNC_TRACE();
+
+ unsupported = txmode->offloads & ~HN_TX_OFFLOAD_CAPS;
+ if (unsupported) {
+ PMD_DRV_LOG(NOTICE,
+ "unsupported TX offload: %#" PRIx64,
+ unsupported);
+ return -EINVAL;
+ }
+
+ unsupported = rxmode->offloads & ~HN_RX_OFFLOAD_CAPS;
+ if (unsupported) {
+ PMD_DRV_LOG(NOTICE,
+ "unsupported RX offload: %#" PRIx64,
+ rxmode->offloads);
+ return -EINVAL;
+ }
+
+ hv->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
+
+ err = hn_rndis_conf_offload(hv, txmode->offloads,
+ rxmode->offloads);
+ if (err) {
+ PMD_DRV_LOG(NOTICE,
+ "offload configure failed");
+ return err;
+ }
+
+ hv->num_queues = RTE_MAX(dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues);
+ subchan = hv->num_queues - 1;
+ if (subchan > 0) {
+ err = hn_subchan_configure(hv, subchan);
+ if (err) {
+ PMD_DRV_LOG(NOTICE,
+ "subchannel configuration failed");
+ return err;
+ }
+
+ err = hn_rndis_conf_rss(hv, rss_conf);
+ if (err) {
+ PMD_DRV_LOG(NOTICE,
+ "rss configuration failed");
+ return err;
+ }
+ }
+
+ return hn_vf_configure(dev, dev_conf);
+}
+
+static int hn_dev_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ unsigned int i;
+
+ hn_vf_stats_get(dev, stats);
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ const struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+ if (!txq)
+ continue;
+
+ stats->opackets += txq->stats.packets;
+ stats->obytes += txq->stats.bytes;
+ stats->oerrors += txq->stats.errors;
+
+ if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+ stats->q_opackets[i] = txq->stats.packets;
+ stats->q_obytes[i] = txq->stats.bytes;
+ }
+ }
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+ if (!rxq)
+ continue;
+
+ stats->ipackets += rxq->stats.packets;
+ stats->ibytes += rxq->stats.bytes;
+ stats->ierrors += rxq->stats.errors;
+ stats->imissed += rxq->stats.ring_full;
+
+ if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
+ stats->q_ipackets[i] = rxq->stats.packets;
+ stats->q_ibytes[i] = rxq->stats.bytes;
+ }
+ }
+
+ stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
+ return 0;
+}
+
+static void
+hn_dev_stats_reset(struct rte_eth_dev *dev)
+{
+ unsigned int i;
+
+ PMD_INIT_FUNC_TRACE();
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+ if (!txq)
+ continue;
+ memset(&txq->stats, 0, sizeof(struct hn_stats));
+ }
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+ if (!rxq)
+ continue;
+
+ memset(&rxq->stats, 0, sizeof(struct hn_stats));
+ }
+}
+
+static void
+hn_dev_xstats_reset(struct rte_eth_dev *dev)
+{
+ hn_dev_stats_reset(dev);
+ hn_vf_xstats_reset(dev);
+}
+
+static int
+hn_dev_xstats_count(struct rte_eth_dev *dev)
+{
+ int ret, count;
+
+ count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings);
+ count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+
+ ret = hn_vf_xstats_get_names(dev, NULL, 0);
+ if (ret < 0)
+ return ret;
+
+ return count + ret;
+}
+
+static int
+hn_dev_xstats_get_names(struct rte_eth_dev *dev,
+ struct rte_eth_xstat_name *xstats_names,
+ unsigned int limit)
+{
+ unsigned int i, t, count = 0;
+ int ret;
+
+ if (!xstats_names)
+ return hn_dev_xstats_count(dev);
+
+ /* Note: limit checked in rte_eth_xstats_names() */
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ const struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+ if (!txq)
+ continue;
+
+ if (count >= limit)
+ break;
+
+ for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
+ snprintf(xstats_names[count++].name,
+ RTE_ETH_XSTATS_NAME_SIZE,
+ "tx_q%u_%s", i, hn_stat_strings[t].name);
+ }
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+ if (!rxq)
+ continue;
+
+ if (count >= limit)
+ break;
+
+ for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
+ snprintf(xstats_names[count++].name,
+ RTE_ETH_XSTATS_NAME_SIZE,
+ "rx_q%u_%s", i,
+ hn_stat_strings[t].name);
+ }
+
+ ret = hn_vf_xstats_get_names(dev, xstats_names + count,
+ limit - count);
+ if (ret < 0)
+ return ret;
+
+ return count + ret;
+}
+
+static int
+hn_dev_xstats_get(struct rte_eth_dev *dev,
+ struct rte_eth_xstat *xstats,
+ unsigned int n)
+{
+ unsigned int i, t, count = 0;
+ const unsigned int nstats = hn_dev_xstats_count(dev);
+ const char *stats;
+ int ret;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if (n < nstats)
+ return nstats;
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ const struct hn_tx_queue *txq = dev->data->tx_queues[i];
+
+ if (!txq)
+ continue;
+
+ stats = (const char *)&txq->stats;
+ for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
+ xstats[count++].value = *(const uint64_t *)
+ (stats + hn_stat_strings[t].offset);
+ }
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ const struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+ if (!rxq)
+ continue;
+
+ stats = (const char *)&rxq->stats;
+ for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
+ xstats[count++].value = *(const uint64_t *)
+ (stats + hn_stat_strings[t].offset);
+ }
+
+ ret = hn_vf_xstats_get(dev, xstats + count, n - count);
+ if (ret < 0)
+ return ret;
+
+ return count + ret;
+}
+
+static int
+hn_dev_start(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ int error;
+
+ PMD_INIT_FUNC_TRACE();
+
+ error = hn_rndis_set_rxfilter(hv,
+ NDIS_PACKET_TYPE_BROADCAST |
+ NDIS_PACKET_TYPE_ALL_MULTICAST |
+ NDIS_PACKET_TYPE_DIRECTED);
+ if (error)
+ return error;
+
+ error = hn_vf_start(dev);
+ if (error)
+ hn_rndis_set_rxfilter(hv, 0);
+
+ return error;
+}
+
+static void
+hn_dev_stop(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+
+ hn_rndis_set_rxfilter(hv, 0);
+ hn_vf_stop(dev);
+}
+
+static void
+hn_dev_close(struct rte_eth_dev *dev)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ hn_vf_close(dev);
+ hn_dev_free_queues(dev);
+}
+
+static const struct eth_dev_ops hn_eth_dev_ops = {
+ .dev_configure = hn_dev_configure,
+ .dev_start = hn_dev_start,
+ .dev_stop = hn_dev_stop,
+ .dev_close = hn_dev_close,
+ .dev_infos_get = hn_dev_info_get,
+ .dev_supported_ptypes_get = hn_vf_supported_ptypes,
+ .promiscuous_enable = hn_dev_promiscuous_enable,
+ .promiscuous_disable = hn_dev_promiscuous_disable,
+ .allmulticast_enable = hn_dev_allmulticast_enable,
+ .allmulticast_disable = hn_dev_allmulticast_disable,
+ .set_mc_addr_list = hn_dev_mc_addr_list,
+ .tx_queue_setup = hn_dev_tx_queue_setup,
+ .tx_queue_release = hn_dev_tx_queue_release,
+ .tx_done_cleanup = hn_dev_tx_done_cleanup,
+ .rx_queue_setup = hn_dev_rx_queue_setup,
+ .rx_queue_release = hn_dev_rx_queue_release,
+ .link_update = hn_dev_link_update,
+ .stats_get = hn_dev_stats_get,
+ .stats_reset = hn_dev_stats_reset,
+ .xstats_get = hn_dev_xstats_get,
+ .xstats_get_names = hn_dev_xstats_get_names,
+ .xstats_reset = hn_dev_xstats_reset,
+};
+
+/*
+ * Setup connection between PMD and kernel.
+ */
+static int
+hn_attach(struct hn_data *hv, unsigned int mtu)
+{
+ int error;
+
+ /* Attach NVS */
+ error = hn_nvs_attach(hv, mtu);
+ if (error)
+ goto failed_nvs;
+
+ /* Attach RNDIS */
+ error = hn_rndis_attach(hv);
+ if (error)
+ goto failed_rndis;
+
+ /*
+ * NOTE:
+ * Under certain conditions on certain versions of Hyper-V,
+ * the RNDIS rxfilter is _not_ zero on the hypervisor side
+ * after the successful RNDIS initialization.
+ */
+ hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_NONE);
+ return 0;
+failed_rndis:
+ hn_nvs_detach(hv);
+failed_nvs:
+ return error;
+}
+
+static void
+hn_detach(struct hn_data *hv)
+{
+ hn_nvs_detach(hv);
+ hn_rndis_detach(hv);
+}
+
+static int
+eth_hn_dev_init(struct rte_eth_dev *eth_dev)
+{
+ struct hn_data *hv = eth_dev->data->dev_private;
+ struct rte_device *device = eth_dev->device;
+ struct rte_vmbus_device *vmbus;
+ unsigned int rxr_cnt;
+ int err, max_chan;
+
+ PMD_INIT_FUNC_TRACE();
+
+ vmbus = container_of(device, struct rte_vmbus_device, device);
+ eth_dev->dev_ops = &hn_eth_dev_ops;
+ eth_dev->tx_pkt_burst = &hn_xmit_pkts;
+ eth_dev->rx_pkt_burst = &hn_recv_pkts;
+
+ /*
+ * for secondary processes, we don't initialize any further as primary
+ * has already done this work.
+ */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
+ /* Since Hyper-V only supports one MAC address, just use local data */
+ eth_dev->data->mac_addrs = &hv->mac_addr;
+
+ hv->vmbus = vmbus;
+ hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP];
+ hv->chim_res = &vmbus->resource[HV_SEND_BUF_MAP];
+ hv->port_id = eth_dev->data->port_id;
+ hv->latency = HN_CHAN_LATENCY_NS;
+ hv->max_queues = 1;
+ hv->vf_port = HN_INVALID_PORT;
+
+ err = hn_parse_args(eth_dev);
+ if (err)
+ return err;
+
+ strlcpy(hv->owner.name, eth_dev->device->name,
+ RTE_ETH_MAX_OWNER_NAME_LEN);
+ err = rte_eth_dev_owner_new(&hv->owner.id);
+ if (err) {
+ PMD_INIT_LOG(ERR, "Can not get owner id");
+ return err;
+ }
+
+ /* Initialize primary channel input for control operations */
+ err = rte_vmbus_chan_open(vmbus, &hv->channels[0]);
+ if (err)
+ return err;
+
+ rte_vmbus_set_latency(hv->vmbus, hv->channels[0], hv->latency);
+
+ hv->primary = hn_rx_queue_alloc(hv, 0,
+ eth_dev->device->numa_node);
+
+ if (!hv->primary)
+ return -ENOMEM;
+
+ err = hn_attach(hv, ETHER_MTU);
+ if (err)
+ goto failed;
+
+ err = hn_tx_pool_init(eth_dev);
+ if (err)
+ goto failed;
+
+ err = hn_rndis_get_eaddr(hv, hv->mac_addr.addr_bytes);
+ if (err)
+ goto failed;
+
+ /* Multi queue requires later versions of windows server */
+ if (hv->nvs_ver < NVS_VERSION_5)
+ return 0;
+
+ max_chan = rte_vmbus_max_channels(vmbus);
+ PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan);
+ if (max_chan <= 0)
+ goto failed;
+
+ if (hn_rndis_query_rsscaps(hv, &rxr_cnt) != 0)
+ rxr_cnt = 1;
+
+ hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan);
+
+ /* If VF was reported but not added, do it now */
+ if (hv->vf_present && !hn_vf_attached(hv)) {
+ PMD_INIT_LOG(DEBUG, "Adding VF device");
+
+ err = hn_vf_add(eth_dev, hv);
+ if (err)
+ hv->vf_present = 0;
+ }
+
+ return 0;
+
+failed:
+ PMD_INIT_LOG(NOTICE, "device init failed");
+
+ hn_tx_pool_uninit(eth_dev);
+ hn_detach(hv);
+ return err;
+}
+
+static int
+eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
+{
+ struct hn_data *hv = eth_dev->data->dev_private;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
+ hn_dev_stop(eth_dev);
+ hn_dev_close(eth_dev);
+
+ eth_dev->dev_ops = NULL;
+ eth_dev->tx_pkt_burst = NULL;
+ eth_dev->rx_pkt_burst = NULL;
+
+ hn_detach(hv);
+ hn_tx_pool_uninit(eth_dev);
+ rte_vmbus_chan_close(hv->primary->chan);
+ rte_free(hv->primary);
+ rte_eth_dev_owner_delete(hv->owner.id);
+
+ return 0;
+}
+
+static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused,
+ struct rte_vmbus_device *dev)
+{
+ struct rte_eth_dev *eth_dev;
+ int ret;
+
+ PMD_INIT_FUNC_TRACE();
+
+ eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data));
+ if (!eth_dev)
+ return -ENOMEM;
+
+ ret = eth_hn_dev_init(eth_dev);
+ if (ret)
+ eth_dev_vmbus_release(eth_dev);
+ else
+ rte_eth_dev_probing_finish(eth_dev);
+
+ return ret;
+}
+
+static int eth_hn_remove(struct rte_vmbus_device *dev)
+{
+ struct rte_eth_dev *eth_dev;
+ int ret;
+
+ PMD_INIT_FUNC_TRACE();
+
+ eth_dev = rte_eth_dev_allocated(dev->device.name);
+ if (!eth_dev)
+ return -ENODEV;
+
+ ret = eth_hn_dev_uninit(eth_dev);
+ if (ret)
+ return ret;
+
+ eth_dev_vmbus_release(eth_dev);
+ return 0;
+}
+
+/* Network device GUID */
+static const rte_uuid_t hn_net_ids[] = {
+ /* f8615163-df3e-46c5-913f-f2d2f965ed0e */
+ RTE_UUID_INIT(0xf8615163, 0xdf3e, 0x46c5, 0x913f, 0xf2d2f965ed0eULL),
+ { 0 }
+};
+
+static struct rte_vmbus_driver rte_netvsc_pmd = {
+ .id_table = hn_net_ids,
+ .probe = eth_hn_probe,
+ .remove = eth_hn_remove,
+};
+
+RTE_PMD_REGISTER_VMBUS(net_netvsc, rte_netvsc_pmd);
+RTE_PMD_REGISTER_KMOD_DEP(net_netvsc, "* uio_hv_generic");
+
+RTE_INIT(hn_init_log)
+{
+ hn_logtype_init = rte_log_register("pmd.net.netvsc.init");
+ if (hn_logtype_init >= 0)
+ rte_log_set_level(hn_logtype_init, RTE_LOG_NOTICE);
+ hn_logtype_driver = rte_log_register("pmd.net.netvsc.driver");
+ if (hn_logtype_driver >= 0)
+ rte_log_set_level(hn_logtype_driver, RTE_LOG_NOTICE);
+}
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_logs.h b/src/seastar/dpdk/drivers/net/netvsc/hn_logs.h
new file mode 100644
index 000000000..cddadef09
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_logs.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef _HN_LOGS_H_
+#define _HN_LOGS_H_
+
+#include <rte_log.h>
+
+extern int hn_logtype_init;
+extern int hn_logtype_driver;
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+ rte_log(RTE_LOG_ ## level, hn_logtype_init, "%s(): " fmt "\n",\
+ __func__, ## args)
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+
+#ifdef RTE_LIBRTE_NETVSC_DEBUG_RX
+#define PMD_RX_LOG(level, fmt, args...) \
+ rte_log(RTE_LOG_ ## level, hn_logtype_driver, \
+ "%s() rx: " fmt "\n", __func__, ## args)
+#else
+#define PMD_RX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_NETVSC_DEBUG_TX
+#define PMD_TX_LOG(level, fmt, args...) \
+ rte_log(RTE_LOG_ ## level, hn_logtype_driver, \
+ "%s() tx: " fmt "\n", __func__, ## args)
+#else
+#define PMD_TX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#define PMD_DRV_LOG(level, fmt, args...) \
+ rte_log(RTE_LOG_ ## level, hn_logtype_driver, "%s(): " fmt "\n", \
+ __func__, ## args)
+
+#endif /* _HN_LOGS_H_ */
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.c b/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.c
new file mode 100644
index 000000000..d58770e04
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.c
@@ -0,0 +1,550 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ */
+
+/*
+ * Network Virtualization Service.
+ */
+
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_bus_vmbus.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_nvs.h"
+
+static const uint32_t hn_nvs_version[] = {
+ NVS_VERSION_61,
+ NVS_VERSION_6,
+ NVS_VERSION_5,
+ NVS_VERSION_4,
+ NVS_VERSION_2,
+ NVS_VERSION_1
+};
+
+static int hn_nvs_req_send(struct hn_data *hv,
+ void *req, uint32_t reqlen)
+{
+ return rte_vmbus_chan_send(hn_primary_chan(hv),
+ VMBUS_CHANPKT_TYPE_INBAND,
+ req, reqlen, 0,
+ VMBUS_CHANPKT_FLAG_NONE, NULL);
+}
+
+static int
+hn_nvs_execute(struct hn_data *hv,
+ void *req, uint32_t reqlen,
+ void *resp, uint32_t resplen,
+ uint32_t type)
+{
+ struct vmbus_channel *chan = hn_primary_chan(hv);
+ char buffer[NVS_RESPSIZE_MAX];
+ const struct hn_nvs_hdr *hdr;
+ uint32_t len;
+ int ret;
+
+ /* Send request to ring buffer */
+ ret = rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND,
+ req, reqlen, 0,
+ VMBUS_CHANPKT_FLAG_RC, NULL);
+
+ if (ret) {
+ PMD_DRV_LOG(ERR, "send request failed: %d", ret);
+ return ret;
+ }
+
+ retry:
+ len = sizeof(buffer);
+ ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL);
+ if (ret == -EAGAIN) {
+ rte_delay_us(HN_CHAN_INTERVAL_US);
+ goto retry;
+ }
+
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "recv response failed: %d", ret);
+ return ret;
+ }
+
+ hdr = (struct hn_nvs_hdr *)buffer;
+ if (hdr->type != type) {
+ PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x",
+ hdr->type, type);
+ return -EINVAL;
+ }
+
+ if (len < resplen) {
+ PMD_DRV_LOG(ERR,
+ "invalid NVS resp len %u (expect %u)",
+ len, resplen);
+ return -EINVAL;
+ }
+
+ memcpy(resp, buffer, resplen);
+
+ /* All pass! */
+ return 0;
+}
+
+static int
+hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver)
+{
+ struct hn_nvs_init init;
+ struct hn_nvs_init_resp resp;
+ uint32_t status;
+ int error;
+
+ memset(&init, 0, sizeof(init));
+ init.type = NVS_TYPE_INIT;
+ init.ver_min = nvs_ver;
+ init.ver_max = nvs_ver;
+
+ error = hn_nvs_execute(hv, &init, sizeof(init),
+ &resp, sizeof(resp),
+ NVS_TYPE_INIT_RESP);
+ if (error)
+ return error;
+
+ status = resp.status;
+ if (status != NVS_STATUS_OK) {
+ /* Not fatal, try other versions */
+ PMD_INIT_LOG(DEBUG, "nvs init failed for ver 0x%x",
+ nvs_ver);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+hn_nvs_conn_rxbuf(struct hn_data *hv)
+{
+ struct hn_nvs_rxbuf_conn conn;
+ struct hn_nvs_rxbuf_connresp resp;
+ uint32_t status;
+ int error;
+
+ /* Kernel has already setup RXBUF on primary channel. */
+
+ /*
+ * Connect RXBUF to NVS.
+ */
+ conn.type = NVS_TYPE_RXBUF_CONN;
+ conn.gpadl = hv->rxbuf_res->phys_addr;
+ conn.sig = NVS_RXBUF_SIG;
+ PMD_DRV_LOG(DEBUG, "connect rxbuff va=%p gpad=%#" PRIx64,
+ hv->rxbuf_res->addr,
+ hv->rxbuf_res->phys_addr);
+
+ error = hn_nvs_execute(hv, &conn, sizeof(conn),
+ &resp, sizeof(resp),
+ NVS_TYPE_RXBUF_CONNRESP);
+ if (error) {
+ PMD_DRV_LOG(ERR,
+ "exec nvs rxbuf conn failed: %d",
+ error);
+ return error;
+ }
+
+ status = resp.status;
+ if (status != NVS_STATUS_OK) {
+ PMD_DRV_LOG(ERR,
+ "nvs rxbuf conn failed: %x", status);
+ return -EIO;
+ }
+ if (resp.nsect != 1) {
+ PMD_DRV_LOG(ERR,
+ "nvs rxbuf response num sections %u != 1",
+ resp.nsect);
+ return -EIO;
+ }
+
+ PMD_DRV_LOG(INFO,
+ "receive buffer size %u count %u",
+ resp.nvs_sect[0].slotsz,
+ resp.nvs_sect[0].slotcnt);
+ hv->rxbuf_section_cnt = resp.nvs_sect[0].slotcnt;
+
+ hv->rxbuf_info = rte_calloc("HN_RXBUF_INFO", hv->rxbuf_section_cnt,
+ sizeof(*hv->rxbuf_info), RTE_CACHE_LINE_SIZE);
+ if (!hv->rxbuf_info) {
+ PMD_DRV_LOG(ERR,
+ "could not allocate rxbuf info");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+hn_nvs_disconn_rxbuf(struct hn_data *hv)
+{
+ struct hn_nvs_rxbuf_disconn disconn;
+ int error;
+
+ /*
+ * Disconnect RXBUF from NVS.
+ */
+ memset(&disconn, 0, sizeof(disconn));
+ disconn.type = NVS_TYPE_RXBUF_DISCONN;
+ disconn.sig = NVS_RXBUF_SIG;
+
+ /* NOTE: No response. */
+ error = hn_nvs_req_send(hv, &disconn, sizeof(disconn));
+ if (error) {
+ PMD_DRV_LOG(ERR,
+ "send nvs rxbuf disconn failed: %d",
+ error);
+ }
+
+ rte_free(hv->rxbuf_info);
+ /*
+ * Linger long enough for NVS to disconnect RXBUF.
+ */
+ rte_delay_ms(200);
+}
+
+static void
+hn_nvs_disconn_chim(struct hn_data *hv)
+{
+ int error;
+
+ if (hv->chim_cnt != 0) {
+ struct hn_nvs_chim_disconn disconn;
+
+ /* Disconnect chimney sending buffer from NVS. */
+ memset(&disconn, 0, sizeof(disconn));
+ disconn.type = NVS_TYPE_CHIM_DISCONN;
+ disconn.sig = NVS_CHIM_SIG;
+
+ /* NOTE: No response. */
+ error = hn_nvs_req_send(hv, &disconn, sizeof(disconn));
+
+ if (error) {
+ PMD_DRV_LOG(ERR,
+ "send nvs chim disconn failed: %d", error);
+ }
+
+ hv->chim_cnt = 0;
+ /*
+ * Linger long enough for NVS to disconnect chimney
+ * sending buffer.
+ */
+ rte_delay_ms(200);
+ }
+}
+
+static int
+hn_nvs_conn_chim(struct hn_data *hv)
+{
+ struct hn_nvs_chim_conn chim;
+ struct hn_nvs_chim_connresp resp;
+ uint32_t sectsz;
+ unsigned long len = hv->chim_res->len;
+ int error;
+
+ /* Connect chimney sending buffer to NVS */
+ memset(&chim, 0, sizeof(chim));
+ chim.type = NVS_TYPE_CHIM_CONN;
+ chim.gpadl = hv->chim_res->phys_addr;
+ chim.sig = NVS_CHIM_SIG;
+ PMD_DRV_LOG(DEBUG, "connect send buf va=%p gpad=%#" PRIx64,
+ hv->chim_res->addr,
+ hv->chim_res->phys_addr);
+
+ error = hn_nvs_execute(hv, &chim, sizeof(chim),
+ &resp, sizeof(resp),
+ NVS_TYPE_CHIM_CONNRESP);
+ if (error) {
+ PMD_DRV_LOG(ERR, "exec nvs chim conn failed");
+ return error;
+ }
+
+ if (resp.status != NVS_STATUS_OK) {
+ PMD_DRV_LOG(ERR, "nvs chim conn failed: %x",
+ resp.status);
+ return -EIO;
+ }
+
+ sectsz = resp.sectsz;
+ if (sectsz == 0 || sectsz & (sizeof(uint32_t) - 1)) {
+ /* Can't use chimney sending buffer; done! */
+ PMD_DRV_LOG(NOTICE,
+ "invalid chimney sending buffer section size: %u",
+ sectsz);
+ error = -EINVAL;
+ goto cleanup;
+ }
+
+ hv->chim_szmax = sectsz;
+ hv->chim_cnt = len / sectsz;
+
+ PMD_DRV_LOG(INFO, "send buffer %lu section size:%u, count:%u",
+ len, hv->chim_szmax, hv->chim_cnt);
+
+ /* Done! */
+ return 0;
+
+cleanup:
+ hn_nvs_disconn_chim(hv);
+ return error;
+}
+
+/*
+ * Configure MTU and enable VLAN.
+ */
+static int
+hn_nvs_conf_ndis(struct hn_data *hv, unsigned int mtu)
+{
+ struct hn_nvs_ndis_conf conf;
+ int error;
+
+ memset(&conf, 0, sizeof(conf));
+ conf.type = NVS_TYPE_NDIS_CONF;
+ conf.mtu = mtu + ETHER_HDR_LEN;
+ conf.caps = NVS_NDIS_CONF_VLAN;
+
+ /* enable SRIOV */
+ if (hv->nvs_ver >= NVS_VERSION_5)
+ conf.caps |= NVS_NDIS_CONF_SRIOV;
+
+ /* NOTE: No response. */
+ error = hn_nvs_req_send(hv, &conf, sizeof(conf));
+ if (error) {
+ PMD_DRV_LOG(ERR,
+ "send nvs ndis conf failed: %d", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int
+hn_nvs_init_ndis(struct hn_data *hv)
+{
+ struct hn_nvs_ndis_init ndis;
+ int error;
+
+ memset(&ndis, 0, sizeof(ndis));
+ ndis.type = NVS_TYPE_NDIS_INIT;
+ ndis.ndis_major = NDIS_VERSION_MAJOR(hv->ndis_ver);
+ ndis.ndis_minor = NDIS_VERSION_MINOR(hv->ndis_ver);
+
+ /* NOTE: No response. */
+ error = hn_nvs_req_send(hv, &ndis, sizeof(ndis));
+ if (error)
+ PMD_DRV_LOG(ERR,
+ "send nvs ndis init failed: %d", error);
+
+ return error;
+}
+
+static int
+hn_nvs_init(struct hn_data *hv)
+{
+ unsigned int i;
+ int error;
+
+ /*
+ * Find the supported NVS version and set NDIS version accordingly.
+ */
+ for (i = 0; i < RTE_DIM(hn_nvs_version); ++i) {
+ error = hn_nvs_doinit(hv, hn_nvs_version[i]);
+ if (error) {
+ PMD_INIT_LOG(DEBUG, "version %#x error %d",
+ hn_nvs_version[i], error);
+ continue;
+ }
+
+ hv->nvs_ver = hn_nvs_version[i];
+
+ /* Set NDIS version according to NVS version. */
+ hv->ndis_ver = NDIS_VERSION_6_30;
+ if (hv->nvs_ver <= NVS_VERSION_4)
+ hv->ndis_ver = NDIS_VERSION_6_1;
+
+ PMD_INIT_LOG(DEBUG,
+ "NVS version %#x, NDIS version %u.%u",
+ hv->nvs_ver, NDIS_VERSION_MAJOR(hv->ndis_ver),
+ NDIS_VERSION_MINOR(hv->ndis_ver));
+ return 0;
+ }
+
+ PMD_DRV_LOG(ERR,
+ "no NVS compatible version available");
+ return -ENXIO;
+}
+
+int
+hn_nvs_attach(struct hn_data *hv, unsigned int mtu)
+{
+ int error;
+
+ /*
+ * Initialize NVS.
+ */
+ error = hn_nvs_init(hv);
+ if (error)
+ return error;
+
+ /** Configure NDIS before initializing it. */
+ if (hv->nvs_ver >= NVS_VERSION_2) {
+ error = hn_nvs_conf_ndis(hv, mtu);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Initialize NDIS.
+ */
+ error = hn_nvs_init_ndis(hv);
+ if (error)
+ return error;
+
+ /*
+ * Connect RXBUF.
+ */
+ error = hn_nvs_conn_rxbuf(hv);
+ if (error)
+ return error;
+
+ /*
+ * Connect chimney sending buffer.
+ */
+ error = hn_nvs_conn_chim(hv);
+ if (error) {
+ hn_nvs_disconn_rxbuf(hv);
+ return error;
+ }
+
+ return 0;
+}
+
+void
+hn_nvs_detach(struct hn_data *hv __rte_unused)
+{
+ PMD_INIT_FUNC_TRACE();
+
+ /* NOTE: there are no requests to stop the NVS. */
+ hn_nvs_disconn_rxbuf(hv);
+ hn_nvs_disconn_chim(hv);
+}
+
+/*
+ * Ack the consumed RXBUF associated w/ this channel packet,
+ * so that this RXBUF can be recycled by the hypervisor.
+ */
+void
+hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid)
+{
+ unsigned int retries = 0;
+ struct hn_nvs_rndis_ack ack = {
+ .type = NVS_TYPE_RNDIS_ACK,
+ .status = NVS_STATUS_OK,
+ };
+ int error;
+
+ PMD_RX_LOG(DEBUG, "ack RX id %" PRIu64, tid);
+
+ again:
+ error = rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
+ &ack, sizeof(ack), tid,
+ VMBUS_CHANPKT_FLAG_NONE, NULL);
+
+ if (error == 0)
+ return;
+
+ if (error == -EAGAIN) {
+ /*
+ * NOTE:
+ * This should _not_ happen in real world, since the
+ * consumption of the TX bufring from the TX path is
+ * controlled.
+ */
+ PMD_RX_LOG(NOTICE, "RXBUF ack retry");
+ if (++retries < 10) {
+ rte_delay_ms(1);
+ goto again;
+ }
+ }
+ /* RXBUF leaks! */
+ PMD_DRV_LOG(ERR, "RXBUF ack failed");
+}
+
+int
+hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch)
+{
+ struct hn_nvs_subch_req req;
+ struct hn_nvs_subch_resp resp;
+ int error;
+
+ memset(&req, 0, sizeof(req));
+ req.type = NVS_TYPE_SUBCH_REQ;
+ req.op = NVS_SUBCH_OP_ALLOC;
+ req.nsubch = *nsubch;
+
+ error = hn_nvs_execute(hv, &req, sizeof(req),
+ &resp, sizeof(resp),
+ NVS_TYPE_SUBCH_RESP);
+ if (error)
+ return error;
+
+ if (resp.status != NVS_STATUS_OK) {
+ PMD_INIT_LOG(ERR,
+ "nvs subch alloc failed: %#x",
+ resp.status);
+ return -EIO;
+ }
+
+ if (resp.nsubch > *nsubch) {
+ PMD_INIT_LOG(NOTICE,
+ "%u subchans are allocated, requested %u",
+ resp.nsubch, *nsubch);
+ }
+ *nsubch = resp.nsubch;
+
+ return 0;
+}
+
+void
+hn_nvs_set_datapath(struct hn_data *hv, uint32_t path)
+{
+ struct hn_nvs_datapath dp;
+ int error;
+
+ PMD_DRV_LOG(DEBUG, "set datapath %s",
+ path ? "VF" : "Synthetic");
+
+ memset(&dp, 0, sizeof(dp));
+ dp.type = NVS_TYPE_SET_DATAPATH;
+ dp.active_path = path;
+
+ error = hn_nvs_req_send(hv, &dp, sizeof(dp));
+ if (error) {
+ PMD_DRV_LOG(ERR,
+ "send set datapath failed: %d",
+ error);
+ }
+}
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.h b/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.h
new file mode 100644
index 000000000..2563fd8d8
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_nvs.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * All rights reserved.
+ */
+
+/*
+ * The indirection table message is the largest message
+ * received from host, and that is 112 bytes.
+ */
+#define NVS_RESPSIZE_MAX 256
+
+/*
+ * NDIS protocol version numbers
+ */
+#define NDIS_VERSION_6_1 0x00060001
+#define NDIS_VERSION_6_20 0x00060014
+#define NDIS_VERSION_6_30 0x0006001e
+#define NDIS_VERSION_MAJOR(ver) (((ver) & 0xffff0000) >> 16)
+#define NDIS_VERSION_MINOR(ver) ((ver) & 0xffff)
+
+/*
+ * NVS versions.
+ */
+#define NVS_VERSION_1 0x00002
+#define NVS_VERSION_2 0x30002
+#define NVS_VERSION_4 0x40000
+#define NVS_VERSION_5 0x50000
+#define NVS_VERSION_6 0x60000
+#define NVS_VERSION_61 0x60001
+
+#define NVS_RXBUF_SIG 0xcafe
+#define NVS_CHIM_SIG 0xface
+
+#define NVS_CHIM_IDX_INVALID 0xffffffff
+
+#define NVS_RNDIS_MTYPE_DATA 0
+#define NVS_RNDIS_MTYPE_CTRL 1
+
+/*
+ * NVS message transacion status codes.
+ */
+#define NVS_STATUS_OK 1
+#define NVS_STATUS_FAILED 2
+
+/*
+ * NVS request/response message types.
+ */
+#define NVS_TYPE_INIT 1
+#define NVS_TYPE_INIT_RESP 2
+
+#define NVS_TYPE_NDIS_INIT 100
+#define NVS_TYPE_RXBUF_CONN 101
+#define NVS_TYPE_RXBUF_CONNRESP 102
+#define NVS_TYPE_RXBUF_DISCONN 103
+#define NVS_TYPE_CHIM_CONN 104
+#define NVS_TYPE_CHIM_CONNRESP 105
+#define NVS_TYPE_CHIM_DISCONN 106
+#define NVS_TYPE_RNDIS 107
+#define NVS_TYPE_RNDIS_ACK 108
+
+#define NVS_TYPE_NDIS_CONF 125
+#define NVS_TYPE_VFASSOC_NOTE 128 /* notification */
+#define NVS_TYPE_SET_DATAPATH 129
+#define NVS_TYPE_SUBCH_REQ 133
+#define NVS_TYPE_SUBCH_RESP 133 /* same as SUBCH_REQ */
+#define NVS_TYPE_TXTBL_NOTE 134 /* notification */
+
+
+/* NVS message common header */
+struct hn_nvs_hdr {
+ uint32_t type;
+} __rte_packed;
+
+struct hn_nvs_init {
+ uint32_t type; /* NVS_TYPE_INIT */
+ uint32_t ver_min;
+ uint32_t ver_max;
+ uint8_t rsvd[28];
+} __rte_packed;
+
+struct hn_nvs_init_resp {
+ uint32_t type; /* NVS_TYPE_INIT_RESP */
+ uint32_t ver; /* deprecated */
+ uint32_t rsvd;
+ uint32_t status; /* NVS_STATUS_ */
+} __rte_packed;
+
+/* No response */
+struct hn_nvs_ndis_conf {
+ uint32_t type; /* NVS_TYPE_NDIS_CONF */
+ uint32_t mtu;
+ uint32_t rsvd;
+ uint64_t caps; /* NVS_NDIS_CONF_ */
+ uint8_t rsvd1[20];
+} __rte_packed;
+
+#define NVS_NDIS_CONF_SRIOV 0x0004
+#define NVS_NDIS_CONF_VLAN 0x0008
+
+/* No response */
+struct hn_nvs_ndis_init {
+ uint32_t type; /* NVS_TYPE_NDIS_INIT */
+ uint32_t ndis_major; /* NDIS_VERSION_MAJOR_ */
+ uint32_t ndis_minor; /* NDIS_VERSION_MINOR_ */
+ uint8_t rsvd[28];
+} __rte_packed;
+
+struct hn_nvs_vf_association {
+ uint32_t type; /* NVS_TYPE_VFASSOC_NOTE */
+ uint32_t allocated;
+ uint32_t serial;
+} __rte_packed;
+
+#define NVS_DATAPATH_SYNTHETIC 0
+#define NVS_DATAPATH_VF 1
+
+/* No response */
+struct hn_nvs_datapath {
+ uint32_t type; /* NVS_TYPE_SET_DATAPATH */
+ uint32_t active_path;/* NVS_DATAPATH_* */
+ uint8_t rsvd[32];
+} __rte_packed;
+
+struct hn_nvs_rxbuf_conn {
+ uint32_t type; /* NVS_TYPE_RXBUF_CONN */
+ uint32_t gpadl; /* RXBUF vmbus GPADL */
+ uint16_t sig; /* NVS_RXBUF_SIG */
+ uint8_t rsvd[30];
+} __rte_packed;
+
+struct hn_nvs_rxbuf_sect {
+ uint32_t start;
+ uint32_t slotsz;
+ uint32_t slotcnt;
+ uint32_t end;
+} __rte_packed;
+
+struct hn_nvs_rxbuf_connresp {
+ uint32_t type; /* NVS_TYPE_RXBUF_CONNRESP */
+ uint32_t status; /* NVS_STATUS_ */
+ uint32_t nsect; /* # of elem in nvs_sect */
+ struct hn_nvs_rxbuf_sect nvs_sect[1];
+} __rte_packed;
+
+/* No response */
+struct hn_nvs_rxbuf_disconn {
+ uint32_t type; /* NVS_TYPE_RXBUF_DISCONN */
+ uint16_t sig; /* NVS_RXBUF_SIG */
+ uint8_t rsvd[34];
+} __rte_packed;
+
+struct hn_nvs_chim_conn {
+ uint32_t type; /* NVS_TYPE_CHIM_CONN */
+ uint32_t gpadl; /* chimney buf vmbus GPADL */
+ uint16_t sig; /* NDIS_NVS_CHIM_SIG */
+ uint8_t rsvd[30];
+} __rte_packed;
+
+struct hn_nvs_chim_connresp {
+ uint32_t type; /* NVS_TYPE_CHIM_CONNRESP */
+ uint32_t status; /* NVS_STATUS_ */
+ uint32_t sectsz; /* section size */
+} __rte_packed;
+
+/* No response */
+struct hn_nvs_chim_disconn {
+ uint32_t type; /* NVS_TYPE_CHIM_DISCONN */
+ uint16_t sig; /* NVS_CHIM_SIG */
+ uint8_t rsvd[34];
+} __rte_packed;
+
+#define NVS_SUBCH_OP_ALLOC 1
+
+struct hn_nvs_subch_req {
+ uint32_t type; /* NVS_TYPE_SUBCH_REQ */
+ uint32_t op; /* NVS_SUBCH_OP_ */
+ uint32_t nsubch;
+ uint8_t rsvd[28];
+} __rte_packed;
+
+struct hn_nvs_subch_resp {
+ uint32_t type; /* NVS_TYPE_SUBCH_RESP */
+ uint32_t status; /* NVS_STATUS_ */
+ uint32_t nsubch;
+ uint8_t rsvd[28];
+} __rte_packed;
+
+struct hn_nvs_rndis {
+ uint32_t type; /* NVS_TYPE_RNDIS */
+ uint32_t rndis_mtype;/* NVS_RNDIS_MTYPE_ */
+ /*
+ * Chimney sending buffer index and size.
+ *
+ * NOTE:
+ * If nvs_chim_idx is set to NVS_CHIM_IDX_INVALID
+ * and nvs_chim_sz is set to 0, then chimney sending
+ * buffer is _not_ used by this RNDIS message.
+ */
+ uint32_t chim_idx;
+ uint32_t chim_sz;
+ uint8_t rsvd[24];
+} __rte_packed;
+
+struct hn_nvs_rndis_ack {
+ uint32_t type; /* NVS_TYPE_RNDIS_ACK */
+ uint32_t status; /* NVS_STATUS_ */
+ uint8_t rsvd[32];
+} __rte_packed;
+
+
+int hn_nvs_attach(struct hn_data *hv, unsigned int mtu);
+void hn_nvs_detach(struct hn_data *hv);
+void hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid);
+int hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch);
+void hn_nvs_set_datapath(struct hn_data *hv, uint32_t path);
+void hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
+ const struct vmbus_chanpkt_hdr *hdr,
+ const void *data);
+
+static inline int
+hn_nvs_send(struct vmbus_channel *chan, uint16_t flags,
+ void *nvs_msg, int nvs_msglen, uintptr_t sndc,
+ bool *need_sig)
+{
+ return rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND,
+ nvs_msg, nvs_msglen, (uint64_t)sndc,
+ flags, need_sig);
+}
+
+static inline int
+hn_nvs_send_sglist(struct vmbus_channel *chan,
+ struct vmbus_gpa sg[], unsigned int sglen,
+ void *nvs_msg, int nvs_msglen,
+ uintptr_t sndc, bool *need_sig)
+{
+ return rte_vmbus_chan_send_sglist(chan, sg, sglen, nvs_msg, nvs_msglen,
+ (uint64_t)sndc, need_sig);
+}
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.c b/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.c
new file mode 100644
index 000000000..0134ecb67
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.c
@@ -0,0 +1,1134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2009-2018 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * All rights reserved.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_bus_vmbus.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_nvs.h"
+#include "hn_rndis.h"
+#include "ndis.h"
+
+#define HN_RNDIS_XFER_SIZE 0x4000
+
+#define HN_NDIS_TXCSUM_CAP_IP4 \
+ (NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT)
+#define HN_NDIS_TXCSUM_CAP_TCP4 \
+ (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
+#define HN_NDIS_TXCSUM_CAP_TCP6 \
+ (NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \
+ NDIS_TXCSUM_CAP_IP6EXT)
+#define HN_NDIS_TXCSUM_CAP_UDP6 \
+ (NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT)
+#define HN_NDIS_LSOV2_CAP_IP6 \
+ (NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT)
+
+/* Get unique request id */
+static inline uint32_t
+hn_rndis_rid(struct hn_data *hv)
+{
+ uint32_t rid;
+
+ do {
+ rid = rte_atomic32_add_return(&hv->rndis_req_id, 1);
+ } while (rid == 0);
+
+ return rid;
+}
+
+static void *hn_rndis_alloc(struct hn_data *hv, size_t size)
+{
+ return rte_zmalloc_socket("RNDIS", size, PAGE_SIZE,
+ hv->vmbus->device.numa_node);
+}
+
+#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP
+void hn_rndis_dump(const void *buf)
+{
+ const union {
+ struct rndis_msghdr hdr;
+ struct rndis_packet_msg pkt;
+ struct rndis_init_req init_request;
+ struct rndis_init_comp init_complete;
+ struct rndis_halt_req halt;
+ struct rndis_query_req query_request;
+ struct rndis_query_comp query_complete;
+ struct rndis_set_req set_request;
+ struct rndis_set_comp set_complete;
+ struct rndis_reset_req reset_request;
+ struct rndis_reset_comp reset_complete;
+ struct rndis_keepalive_req keepalive_request;
+ struct rndis_keepalive_comp keepalive_complete;
+ struct rndis_status_msg indicate_status;
+ } *rndis_msg = buf;
+
+ switch (rndis_msg->hdr.type) {
+ case RNDIS_PACKET_MSG: {
+ const struct rndis_pktinfo *ppi;
+ unsigned int ppi_len;
+
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_MSG_PACKET (len %u, data %u:%u, # oob %u %u:%u, pkt %u:%u)\n",
+ rndis_msg->pkt.len,
+ rndis_msg->pkt.dataoffset,
+ rndis_msg->pkt.datalen,
+ rndis_msg->pkt.oobdataelements,
+ rndis_msg->pkt.oobdataoffset,
+ rndis_msg->pkt.oobdatalen,
+ rndis_msg->pkt.pktinfooffset,
+ rndis_msg->pkt.pktinfolen);
+
+ ppi = (const struct rndis_pktinfo *)
+ ((const char *)buf
+ + RNDIS_PACKET_MSG_OFFSET_ABS(rndis_msg->pkt.pktinfooffset));
+
+ ppi_len = rndis_msg->pkt.pktinfolen;
+ while (ppi_len > 0) {
+ const void *ppi_data;
+
+ ppi_data = ppi->data;
+
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ " PPI (size %u, type %u, offs %u data %#x)\n",
+ ppi->size, ppi->type, ppi->offset,
+ *(const uint32_t *)ppi_data);
+ if (ppi->size == 0)
+ break;
+ ppi_len -= ppi->size;
+ ppi = (const struct rndis_pktinfo *)
+ ((const char *)ppi + ppi->size);
+ }
+ break;
+ }
+ case RNDIS_INITIALIZE_MSG:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_MSG_INIT (len %u id %#x, ver %u.%u max xfer %u)\n",
+ rndis_msg->init_request.len,
+ rndis_msg->init_request.rid,
+ rndis_msg->init_request.ver_major,
+ rndis_msg->init_request.ver_minor,
+ rndis_msg->init_request.max_xfersz);
+ break;
+
+ case RNDIS_INITIALIZE_CMPLT:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_MSG_INIT_C (len %u, id %#x, status 0x%x, vers %u.%u, "
+ "flags %d, max xfer %u, max pkts %u, aligned %u)\n",
+ rndis_msg->init_complete.len,
+ rndis_msg->init_complete.rid,
+ rndis_msg->init_complete.status,
+ rndis_msg->init_complete.ver_major,
+ rndis_msg->init_complete.ver_minor,
+ rndis_msg->init_complete.devflags,
+ rndis_msg->init_complete.pktmaxsz,
+ rndis_msg->init_complete.pktmaxcnt,
+ rndis_msg->init_complete.align);
+ break;
+
+ case RNDIS_HALT_MSG:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_HALT (len %u id %#x)\n",
+ rndis_msg->halt.len, rndis_msg->halt.rid);
+ break;
+
+ case RNDIS_QUERY_MSG:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_QUERY (len %u, id %#x, oid %#x, info %u:%u)\n",
+ rndis_msg->query_request.len,
+ rndis_msg->query_request.rid,
+ rndis_msg->query_request.oid,
+ rndis_msg->query_request.infobuflen,
+ rndis_msg->query_request.infobufoffset);
+ break;
+
+ case RNDIS_QUERY_CMPLT:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_MSG_QUERY_C (len %u, id %#x, status 0x%x, buf %u:%u)\n",
+ rndis_msg->query_complete.len,
+ rndis_msg->query_complete.rid,
+ rndis_msg->query_complete.status,
+ rndis_msg->query_complete.infobuflen,
+ rndis_msg->query_complete.infobufoffset);
+ break;
+
+ case RNDIS_SET_MSG:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_SET (len %u, id %#x, oid %#x, info %u:%u)\n",
+ rndis_msg->set_request.len,
+ rndis_msg->set_request.rid,
+ rndis_msg->set_request.oid,
+ rndis_msg->set_request.infobuflen,
+ rndis_msg->set_request.infobufoffset);
+ break;
+
+ case RNDIS_SET_CMPLT:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n",
+ rndis_msg->set_complete.len,
+ rndis_msg->set_complete.rid,
+ rndis_msg->set_complete.status);
+ break;
+
+ case RNDIS_INDICATE_STATUS_MSG:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_MSG_INDICATE (len %u, status %#x, buf len %u, buf offset %u)\n",
+ rndis_msg->indicate_status.len,
+ rndis_msg->indicate_status.status,
+ rndis_msg->indicate_status.stbuflen,
+ rndis_msg->indicate_status.stbufoffset);
+ break;
+
+ case RNDIS_RESET_MSG:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_RESET (len %u, id %#x)\n",
+ rndis_msg->reset_request.len,
+ rndis_msg->reset_request.rid);
+ break;
+
+ case RNDIS_RESET_CMPLT:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_RESET_C (len %u, status %#x address %#x)\n",
+ rndis_msg->reset_complete.len,
+ rndis_msg->reset_complete.status,
+ rndis_msg->reset_complete.adrreset);
+ break;
+
+ case RNDIS_KEEPALIVE_MSG:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_KEEPALIVE (len %u, id %#x)\n",
+ rndis_msg->keepalive_request.len,
+ rndis_msg->keepalive_request.rid);
+ break;
+
+ case RNDIS_KEEPALIVE_CMPLT:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS_KEEPALIVE_C (len %u, id %#x address %#x)\n",
+ rndis_msg->keepalive_complete.len,
+ rndis_msg->keepalive_complete.rid,
+ rndis_msg->keepalive_complete.status);
+ break;
+
+ default:
+ rte_log(RTE_LOG_DEBUG, hn_logtype_driver,
+ "RNDIS type %#x len %u\n",
+ rndis_msg->hdr.type,
+ rndis_msg->hdr.len);
+ break;
+ }
+}
+#endif
+
+static int hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan,
+ const void *req, uint32_t reqlen)
+
+{
+ struct hn_nvs_rndis nvs_rndis = {
+ .type = NVS_TYPE_RNDIS,
+ .rndis_mtype = NVS_RNDIS_MTYPE_CTRL,
+ .chim_idx = NVS_CHIM_IDX_INVALID,
+ .chim_sz = 0
+ };
+ struct vmbus_gpa sg;
+ rte_iova_t addr;
+
+ addr = rte_malloc_virt2iova(req);
+ if (unlikely(addr == RTE_BAD_IOVA)) {
+ PMD_DRV_LOG(ERR, "RNDIS send request can not get iova");
+ return -EINVAL;
+ }
+
+ if (unlikely(reqlen > PAGE_SIZE)) {
+ PMD_DRV_LOG(ERR, "RNDIS request %u greater than page size",
+ reqlen);
+ return -EINVAL;
+ }
+
+ sg.page = addr / PAGE_SIZE;
+ sg.ofs = addr & PAGE_MASK;
+ sg.len = reqlen;
+
+ if (sg.ofs + reqlen > PAGE_SIZE) {
+ PMD_DRV_LOG(ERR, "RNDIS request crosses page bounary");
+ return -EINVAL;
+ }
+
+ hn_rndis_dump(req);
+
+ return hn_nvs_send_sglist(chan, &sg, 1,
+ &nvs_rndis, sizeof(nvs_rndis), 0U, NULL);
+}
+
+void hn_rndis_link_status(struct rte_eth_dev *dev, const void *msg)
+{
+ const struct rndis_status_msg *indicate = msg;
+
+ hn_rndis_dump(msg);
+
+ PMD_DRV_LOG(DEBUG, "link status %#x", indicate->status);
+
+ switch (indicate->status) {
+ case RNDIS_STATUS_NETWORK_CHANGE:
+ case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
+ /* ignore not in DPDK API */
+ break;
+
+ case RNDIS_STATUS_LINK_SPEED_CHANGE:
+ case RNDIS_STATUS_MEDIA_CONNECT:
+ case RNDIS_STATUS_MEDIA_DISCONNECT:
+ if (dev->data->dev_conf.intr_conf.lsc &&
+ hn_dev_link_update(dev, 0) == 0)
+ _rte_eth_dev_callback_process(dev,
+ RTE_ETH_EVENT_INTR_LSC,
+ NULL);
+ break;
+ default:
+ PMD_DRV_LOG(NOTICE, "unknown RNDIS indication: %#x",
+ indicate->status);
+ }
+}
+
+/* Callback from hn_process_events when response is visible */
+void hn_rndis_receive_response(struct hn_data *hv,
+ const void *data, uint32_t len)
+{
+ const struct rndis_init_comp *hdr = data;
+
+ hn_rndis_dump(data);
+
+ if (len < sizeof(3 * sizeof(uint32_t))) {
+ PMD_DRV_LOG(ERR,
+ "missing RNDIS header %u", len);
+ return;
+ }
+
+ if (len < hdr->len) {
+ PMD_DRV_LOG(ERR,
+ "truncated RNDIS response %u", len);
+ return;
+ }
+
+ if (len > sizeof(hv->rndis_resp)) {
+ PMD_DRV_LOG(NOTICE,
+ "RNDIS response exceeds buffer");
+ len = sizeof(hv->rndis_resp);
+ }
+
+ if (hdr->rid == 0) {
+ PMD_DRV_LOG(NOTICE,
+ "RNDIS response id zero!");
+ }
+
+ memcpy(hv->rndis_resp, data, len);
+
+ /* make sure response copied before update */
+ rte_smp_wmb();
+
+ if (rte_atomic32_cmpset(&hv->rndis_pending, hdr->rid, 0) == 0) {
+ PMD_DRV_LOG(ERR,
+ "received id %#x pending id %#x",
+ hdr->rid, (uint32_t)hv->rndis_pending);
+ }
+}
+
+/* Do request/response transaction */
+static int hn_rndis_exec1(struct hn_data *hv,
+ const void *req, uint32_t reqlen,
+ void *comp, uint32_t comp_len)
+{
+ const struct rndis_halt_req *hdr = req;
+ uint32_t rid = hdr->rid;
+ struct vmbus_channel *chan = hn_primary_chan(hv);
+ int error;
+
+ if (comp_len > sizeof(hv->rndis_resp)) {
+ PMD_DRV_LOG(ERR,
+ "Expected completion size %u exceeds buffer %zu",
+ comp_len, sizeof(hv->rndis_resp));
+ return -EIO;
+ }
+
+ if (comp != NULL &&
+ rte_atomic32_cmpset(&hv->rndis_pending, 0, rid) == 0) {
+ PMD_DRV_LOG(ERR,
+ "Request already pending");
+ return -EBUSY;
+ }
+
+ error = hn_nvs_send_rndis_ctrl(chan, req, reqlen);
+ if (error) {
+ PMD_DRV_LOG(ERR, "RNDIS ctrl send failed: %d", error);
+ return error;
+ }
+
+ if (comp) {
+ /* Poll primary channel until response received */
+ while (hv->rndis_pending == rid)
+ hn_process_events(hv, 0, 1);
+
+ memcpy(comp, hv->rndis_resp, comp_len);
+ }
+
+ return 0;
+}
+
+/* Do transaction and validate response */
+static int hn_rndis_execute(struct hn_data *hv, uint32_t rid,
+ const void *req, uint32_t reqlen,
+ void *comp, uint32_t comp_len, uint32_t comp_type)
+{
+ const struct rndis_comp_hdr *hdr = comp;
+ int ret;
+
+ memset(comp, 0, comp_len);
+
+ ret = hn_rndis_exec1(hv, req, reqlen, comp, comp_len);
+ if (ret < 0)
+ return ret;
+ /*
+ * Check this RNDIS complete message.
+ */
+ if (unlikely(hdr->type != comp_type)) {
+ PMD_DRV_LOG(ERR,
+ "unexpected RNDIS response complete %#x expect %#x",
+ hdr->type, comp_type);
+
+ return -ENXIO;
+ }
+ if (unlikely(hdr->rid != rid)) {
+ PMD_DRV_LOG(ERR,
+ "RNDIS comp rid mismatch %#x, expect %#x",
+ hdr->rid, rid);
+ return -EINVAL;
+ }
+
+ /* All pass! */
+ return 0;
+}
+
+static int
+hn_rndis_query(struct hn_data *hv, uint32_t oid,
+ const void *idata, uint32_t idlen,
+ void *odata, uint32_t odlen)
+{
+ struct rndis_query_req *req;
+ struct rndis_query_comp *comp;
+ uint32_t reqlen, comp_len;
+ int error = -EIO;
+ unsigned int ofs;
+ uint32_t rid;
+
+ reqlen = sizeof(*req) + idlen;
+ req = hn_rndis_alloc(hv, reqlen);
+ if (req == NULL)
+ return -ENOMEM;
+
+ comp_len = sizeof(*comp) + odlen;
+ comp = rte_zmalloc("QUERY", comp_len, PAGE_SIZE);
+ if (!comp) {
+ error = -ENOMEM;
+ goto done;
+ }
+ comp->status = RNDIS_STATUS_PENDING;
+
+ rid = hn_rndis_rid(hv);
+
+ req->type = RNDIS_QUERY_MSG;
+ req->len = reqlen;
+ req->rid = rid;
+ req->oid = oid;
+ req->infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET;
+ req->infobuflen = idlen;
+
+ /* Input data immediately follows RNDIS query. */
+ memcpy(req + 1, idata, idlen);
+
+ error = hn_rndis_execute(hv, rid, req, reqlen,
+ comp, comp_len, RNDIS_QUERY_CMPLT);
+
+ if (error)
+ goto done;
+
+ if (comp->status != RNDIS_STATUS_SUCCESS) {
+ PMD_DRV_LOG(ERR, "RNDIS query 0x%08x failed: status 0x%08x",
+ oid, comp->status);
+ error = -EINVAL;
+ goto done;
+ }
+
+ if (comp->infobuflen == 0 || comp->infobufoffset == 0) {
+ /* No output data! */
+ PMD_DRV_LOG(ERR, "RNDIS query 0x%08x, no data", oid);
+ error = 0;
+ goto done;
+ }
+
+ /*
+ * Check output data length and offset.
+ */
+ /* ofs is the offset from the beginning of comp. */
+ ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->infobufoffset);
+ if (ofs < sizeof(*comp) || ofs + comp->infobuflen > comp_len) {
+ PMD_DRV_LOG(ERR, "RNDIS query invalid comp ib off/len, %u/%u",
+ comp->infobufoffset, comp->infobuflen);
+ error = -EINVAL;
+ goto done;
+ }
+
+ /* Save output data. */
+ if (comp->infobuflen < odlen)
+ odlen = comp->infobuflen;
+
+ /* ofs is the offset from the beginning of comp. */
+ memcpy(odata, (const char *)comp + ofs, odlen);
+
+ error = 0;
+done:
+ rte_free(comp);
+ rte_free(req);
+ return error;
+}
+
+static int
+hn_rndis_halt(struct hn_data *hv)
+{
+ struct rndis_halt_req *halt;
+
+ halt = hn_rndis_alloc(hv, sizeof(*halt));
+ if (halt == NULL)
+ return -ENOMEM;
+
+ halt->type = RNDIS_HALT_MSG;
+ halt->len = sizeof(*halt);
+ halt->rid = hn_rndis_rid(hv);
+
+ /* No RNDIS completion; rely on NVS message send completion */
+ hn_rndis_exec1(hv, halt, sizeof(*halt), NULL, 0);
+
+ rte_free(halt);
+
+ PMD_INIT_LOG(DEBUG, "RNDIS halt done");
+ return 0;
+}
+
+static int
+hn_rndis_query_hwcaps(struct hn_data *hv, struct ndis_offload *caps)
+{
+ struct ndis_offload in;
+ uint32_t caps_len, size;
+ int error;
+
+ memset(caps, 0, sizeof(*caps));
+ memset(&in, 0, sizeof(in));
+ in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD;
+
+ if (hv->ndis_ver >= NDIS_VERSION_6_30) {
+ in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3;
+ size = NDIS_OFFLOAD_SIZE;
+ } else if (hv->ndis_ver >= NDIS_VERSION_6_1) {
+ in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2;
+ size = NDIS_OFFLOAD_SIZE_6_1;
+ } else {
+ in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1;
+ size = NDIS_OFFLOAD_SIZE_6_0;
+ }
+ in.ndis_hdr.ndis_size = size;
+
+ caps_len = NDIS_OFFLOAD_SIZE;
+ error = hn_rndis_query(hv, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
+ &in, size, caps, caps_len);
+ if (error)
+ return error;
+
+ /* Preliminary verification. */
+ if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) {
+ PMD_DRV_LOG(NOTICE, "invalid NDIS objtype 0x%02x",
+ caps->ndis_hdr.ndis_type);
+ return -EINVAL;
+ }
+ if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) {
+ PMD_DRV_LOG(NOTICE, "invalid NDIS objrev 0x%02x",
+ caps->ndis_hdr.ndis_rev);
+ return -EINVAL;
+ }
+ if (caps->ndis_hdr.ndis_size > caps_len) {
+ PMD_DRV_LOG(NOTICE, "invalid NDIS objsize %u, data size %u",
+ caps->ndis_hdr.ndis_size, caps_len);
+ return -EINVAL;
+ } else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) {
+ PMD_DRV_LOG(NOTICE, "invalid NDIS objsize %u",
+ caps->ndis_hdr.ndis_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+hn_rndis_query_rsscaps(struct hn_data *hv,
+ unsigned int *rxr_cnt0)
+{
+ struct ndis_rss_caps in, caps;
+ unsigned int indsz, rxr_cnt;
+ uint32_t caps_len;
+ int error;
+
+ *rxr_cnt0 = 0;
+
+ if (hv->ndis_ver < NDIS_VERSION_6_20) {
+ PMD_DRV_LOG(DEBUG, "RSS not supported on this host");
+ return -EOPNOTSUPP;
+ }
+
+ memset(&in, 0, sizeof(in));
+ in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS;
+ in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2;
+ in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE;
+
+ caps_len = NDIS_RSS_CAPS_SIZE;
+ error = hn_rndis_query(hv, OID_GEN_RECEIVE_SCALE_CAPABILITIES,
+ &in, NDIS_RSS_CAPS_SIZE,
+ &caps, caps_len);
+ if (error)
+ return error;
+
+ PMD_INIT_LOG(DEBUG, "RX rings %u indirect %u caps %#x",
+ caps.ndis_nrxr, caps.ndis_nind, caps.ndis_caps);
+ /*
+ * Preliminary verification.
+ */
+ if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) {
+ PMD_DRV_LOG(ERR, "invalid NDIS objtype 0x%02x",
+ caps.ndis_hdr.ndis_type);
+ return -EINVAL;
+ }
+ if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) {
+ PMD_DRV_LOG(ERR, "invalid NDIS objrev 0x%02x",
+ caps.ndis_hdr.ndis_rev);
+ return -EINVAL;
+ }
+ if (caps.ndis_hdr.ndis_size > caps_len) {
+ PMD_DRV_LOG(ERR,
+ "invalid NDIS objsize %u, data size %u",
+ caps.ndis_hdr.ndis_size, caps_len);
+ return -EINVAL;
+ } else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) {
+ PMD_DRV_LOG(ERR, "invalid NDIS objsize %u",
+ caps.ndis_hdr.ndis_size);
+ return -EINVAL;
+ }
+
+ /*
+ * Save information for later RSS configuration.
+ */
+ if (caps.ndis_nrxr == 0) {
+ PMD_DRV_LOG(ERR, "0 RX rings!?");
+ return -EINVAL;
+ }
+ rxr_cnt = caps.ndis_nrxr;
+
+ if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE &&
+ caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) {
+ if (caps.ndis_nind > NDIS_HASH_INDCNT) {
+ PMD_DRV_LOG(ERR,
+ "too many RSS indirect table entries %u",
+ caps.ndis_nind);
+ return -EOPNOTSUPP;
+ }
+ if (!rte_is_power_of_2(caps.ndis_nind)) {
+ PMD_DRV_LOG(ERR,
+ "RSS indirect table size is not power-of-2 %u",
+ caps.ndis_nind);
+ }
+
+ indsz = caps.ndis_nind;
+ } else {
+ indsz = NDIS_HASH_INDCNT;
+ }
+
+ if (indsz < rxr_cnt) {
+ PMD_DRV_LOG(NOTICE,
+ "# of RX rings (%d) > RSS indirect table size %d",
+ rxr_cnt, indsz);
+ rxr_cnt = indsz;
+ }
+
+ hv->rss_offloads = 0;
+ if (caps.ndis_caps & NDIS_RSS_CAP_IPV4)
+ hv->rss_offloads |= ETH_RSS_IPV4
+ | ETH_RSS_NONFRAG_IPV4_TCP
+ | ETH_RSS_NONFRAG_IPV4_UDP;
+ if (caps.ndis_caps & NDIS_RSS_CAP_IPV6)
+ hv->rss_offloads |= ETH_RSS_IPV6
+ | ETH_RSS_NONFRAG_IPV6_TCP;
+ if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX)
+ hv->rss_offloads |= ETH_RSS_IPV6_EX
+ | ETH_RSS_IPV6_TCP_EX;
+
+ /* Commit! */
+ *rxr_cnt0 = rxr_cnt;
+
+ return 0;
+}
+
+static int
+hn_rndis_set(struct hn_data *hv, uint32_t oid, const void *data, uint32_t dlen)
+{
+ struct rndis_set_req *req;
+ struct rndis_set_comp comp;
+ uint32_t reqlen, comp_len;
+ uint32_t rid;
+ int error;
+
+ reqlen = sizeof(*req) + dlen;
+ req = rte_zmalloc("RNDIS_SET", reqlen, PAGE_SIZE);
+ if (!req)
+ return -ENOMEM;
+
+ rid = hn_rndis_rid(hv);
+ req->type = RNDIS_SET_MSG;
+ req->len = reqlen;
+ req->rid = rid;
+ req->oid = oid;
+ req->infobuflen = dlen;
+ req->infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET;
+
+ /* Data immediately follows RNDIS set. */
+ memcpy(req + 1, data, dlen);
+
+ comp_len = sizeof(comp);
+ error = hn_rndis_execute(hv, rid, req, reqlen,
+ &comp, comp_len,
+ RNDIS_SET_CMPLT);
+ if (error) {
+ PMD_DRV_LOG(ERR, "exec RNDIS set %#" PRIx32 " failed",
+ oid);
+ error = EIO;
+ goto done;
+ }
+
+ if (comp.status != RNDIS_STATUS_SUCCESS) {
+ PMD_DRV_LOG(ERR,
+ "RNDIS set %#" PRIx32 " failed: status %#" PRIx32,
+ oid, comp.status);
+ error = EIO;
+ goto done;
+ }
+
+done:
+ rte_free(req);
+ return error;
+}
+
+int hn_rndis_conf_offload(struct hn_data *hv,
+ uint64_t tx_offloads, uint64_t rx_offloads)
+{
+ struct ndis_offload_params params;
+ struct ndis_offload hwcaps;
+ int error;
+
+ error = hn_rndis_query_hwcaps(hv, &hwcaps);
+ if (error) {
+ PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error);
+ return error;
+ }
+
+ /* NOTE: 0 means "no change" */
+ memset(&params, 0, sizeof(params));
+
+ params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT;
+ if (hv->ndis_ver < NDIS_VERSION_6_30) {
+ params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2;
+ params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE_6_1;
+ } else {
+ params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3;
+ params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE;
+ }
+
+ if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) {
+ if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_TCP4)
+ params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX;
+ else
+ goto unsupported;
+
+ if (hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_TCP6)
+ params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX;
+ else
+ goto unsupported;
+ }
+
+ if (rx_offloads & DEV_RX_OFFLOAD_TCP_CKSUM) {
+ if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4)
+ == NDIS_RXCSUM_CAP_TCP4)
+ params.ndis_tcp4csum |= NDIS_OFFLOAD_PARAM_RX;
+ else
+ goto unsupported;
+
+ if ((hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6)
+ == NDIS_RXCSUM_CAP_TCP6)
+ params.ndis_tcp6csum |= NDIS_OFFLOAD_PARAM_RX;
+ else
+ goto unsupported;
+ }
+
+ if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
+ if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4)
+ params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX;
+ else
+ goto unsupported;
+
+ if ((hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_UDP6)
+ == NDIS_TXCSUM_CAP_UDP6)
+ params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX;
+ else
+ goto unsupported;
+ }
+
+ if (rx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) {
+ if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4)
+ params.ndis_udp4csum |= NDIS_OFFLOAD_PARAM_RX;
+ else
+ goto unsupported;
+
+ if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6)
+ params.ndis_udp6csum |= NDIS_OFFLOAD_PARAM_RX;
+ else
+ goto unsupported;
+ }
+
+ if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) {
+ if ((hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_IP4)
+ == NDIS_TXCSUM_CAP_IP4)
+ params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX;
+ else
+ goto unsupported;
+ }
+ if (rx_offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) {
+ if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
+ params.ndis_ip4csum |= NDIS_OFFLOAD_PARAM_RX;
+ else
+ goto unsupported;
+ }
+
+ if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO) {
+ if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023)
+ params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON;
+ else
+ goto unsupported;
+
+ if ((hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6)
+ == HN_NDIS_LSOV2_CAP_IP6)
+ params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON;
+ else
+ goto unsupported;
+ }
+
+ error = hn_rndis_set(hv, OID_TCP_OFFLOAD_PARAMETERS, &params,
+ params.ndis_hdr.ndis_size);
+ if (error) {
+ PMD_DRV_LOG(ERR, "offload config failed");
+ return error;
+ }
+
+ return 0;
+ unsupported:
+ PMD_DRV_LOG(NOTICE,
+ "offload tx:%" PRIx64 " rx:%" PRIx64 " not supported by this version",
+ tx_offloads, rx_offloads);
+ return -EINVAL;
+}
+
+int hn_rndis_get_offload(struct hn_data *hv,
+ struct rte_eth_dev_info *dev_info)
+{
+ struct ndis_offload hwcaps;
+ int error;
+
+ memset(&hwcaps, 0, sizeof(hwcaps));
+
+ error = hn_rndis_query_hwcaps(hv, &hwcaps);
+ if (error) {
+ PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error);
+ return error;
+ }
+
+ dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
+ DEV_TX_OFFLOAD_VLAN_INSERT;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4)
+ == HN_NDIS_TXCSUM_CAP_IP4)
+ dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_IPV4_CKSUM;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4)
+ == HN_NDIS_TXCSUM_CAP_TCP4 &&
+ (hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6)
+ == HN_NDIS_TXCSUM_CAP_TCP6)
+ dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_CKSUM;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) &&
+ (hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_UDP6))
+ dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_UDP_CKSUM;
+
+ if ((hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) &&
+ (hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6)
+ == HN_NDIS_LSOV2_CAP_IP6)
+ dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
+
+ dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
+
+ if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
+ dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_IPV4_CKSUM;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) &&
+ (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6))
+ dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_CKSUM;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) &&
+ (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6))
+ dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_UDP_CKSUM;
+
+ return 0;
+}
+
+uint32_t
+hn_rndis_get_ptypes(struct hn_data *hv)
+{
+ struct ndis_offload hwcaps;
+ uint32_t ptypes;
+ int error;
+
+ memset(&hwcaps, 0, sizeof(hwcaps));
+
+ error = hn_rndis_query_hwcaps(hv, &hwcaps);
+ if (error) {
+ PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error);
+ return RTE_PTYPE_L2_ETHER;
+ }
+
+ ptypes = RTE_PTYPE_L2_ETHER;
+
+ if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
+ ptypes |= RTE_PTYPE_L3_IPV4;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) ||
+ (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6))
+ ptypes |= RTE_PTYPE_L4_TCP;
+
+ if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) ||
+ (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6))
+ ptypes |= RTE_PTYPE_L4_UDP;
+
+ return ptypes;
+}
+
+int
+hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter)
+{
+ int error;
+
+ error = hn_rndis_set(hv, OID_GEN_CURRENT_PACKET_FILTER,
+ &filter, sizeof(filter));
+ if (error) {
+ PMD_DRV_LOG(ERR, "set RX filter %#" PRIx32 " failed: %d",
+ filter, error);
+ } else {
+ PMD_DRV_LOG(DEBUG, "set RX filter %#" PRIx32 " done", filter);
+ }
+
+ return error;
+}
+
+/* The default RSS key.
+ * This value is the same as MLX5 so that flows will be
+ * received on same path for both VF ans synthetic NIC.
+ */
+static const uint8_t rss_default_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
+ 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
+ 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
+ 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
+ 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
+ 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a,
+};
+
+int hn_rndis_conf_rss(struct hn_data *hv,
+ const struct rte_eth_rss_conf *rss_conf)
+{
+ struct ndis_rssprm_toeplitz rssp;
+ struct ndis_rss_params *prm = &rssp.rss_params;
+ const uint8_t *rss_key = rss_conf->rss_key ? : rss_default_key;
+ uint32_t rss_hash;
+ unsigned int i;
+ int error;
+
+ PMD_INIT_FUNC_TRACE();
+
+ memset(&rssp, 0, sizeof(rssp));
+
+ prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS;
+ prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2;
+ prm->ndis_hdr.ndis_size = sizeof(*prm);
+ prm->ndis_flags = 0;
+
+ rss_hash = NDIS_HASH_FUNCTION_TOEPLITZ;
+ if (rss_conf->rss_hf & ETH_RSS_IPV4)
+ rss_hash |= NDIS_HASH_IPV4;
+ if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
+ rss_hash |= NDIS_HASH_TCP_IPV4;
+ if (rss_conf->rss_hf & ETH_RSS_IPV6)
+ rss_hash |= NDIS_HASH_IPV6;
+ if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
+ rss_hash |= NDIS_HASH_TCP_IPV6;
+
+ prm->ndis_hash = rss_hash;
+ prm->ndis_indsize = sizeof(rssp.rss_ind[0]) * NDIS_HASH_INDCNT;
+ prm->ndis_indoffset = offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]);
+ prm->ndis_keysize = NDIS_HASH_KEYSIZE_TOEPLITZ;
+ prm->ndis_keyoffset = offsetof(struct ndis_rssprm_toeplitz, rss_key[0]);
+
+ for (i = 0; i < NDIS_HASH_INDCNT; i++)
+ rssp.rss_ind[i] = i % hv->num_queues;
+
+ /* Set hask key values */
+ memcpy(&rssp.rss_key, rss_key, NDIS_HASH_KEYSIZE_TOEPLITZ);
+
+ error = hn_rndis_set(hv, OID_GEN_RECEIVE_SCALE_PARAMETERS,
+ &rssp, sizeof(rssp));
+ if (error) {
+ PMD_DRV_LOG(ERR,
+ "RSS config num queues=%u failed: %d",
+ hv->num_queues, error);
+ }
+ return error;
+}
+
+static int hn_rndis_init(struct hn_data *hv)
+{
+ struct rndis_init_req *req;
+ struct rndis_init_comp comp;
+ uint32_t comp_len, rid;
+ int error;
+
+ req = hn_rndis_alloc(hv, sizeof(*req));
+ if (!req) {
+ PMD_DRV_LOG(ERR, "no memory for RNDIS init");
+ return -ENXIO;
+ }
+
+ rid = hn_rndis_rid(hv);
+ req->type = RNDIS_INITIALIZE_MSG;
+ req->len = sizeof(*req);
+ req->rid = rid;
+ req->ver_major = RNDIS_VERSION_MAJOR;
+ req->ver_minor = RNDIS_VERSION_MINOR;
+ req->max_xfersz = HN_RNDIS_XFER_SIZE;
+
+ comp_len = RNDIS_INIT_COMP_SIZE_MIN;
+ error = hn_rndis_execute(hv, rid, req, sizeof(*req),
+ &comp, comp_len,
+ RNDIS_INITIALIZE_CMPLT);
+ if (error)
+ goto done;
+
+ if (comp.status != RNDIS_STATUS_SUCCESS) {
+ PMD_DRV_LOG(ERR, "RNDIS init failed: status 0x%08x",
+ comp.status);
+ error = -EIO;
+ goto done;
+ }
+
+ hv->rndis_agg_size = comp.pktmaxsz;
+ hv->rndis_agg_pkts = comp.pktmaxcnt;
+ hv->rndis_agg_align = 1U << comp.align;
+
+ if (hv->rndis_agg_align < sizeof(uint32_t)) {
+ /*
+ * The RNDIS packet message encap assumes that the RNDIS
+ * packet message is at least 4 bytes aligned. Fix up the
+ * alignment here, if the remote side sets the alignment
+ * too low.
+ */
+ PMD_DRV_LOG(NOTICE,
+ "fixup RNDIS aggpkt align: %u -> %zu",
+ hv->rndis_agg_align, sizeof(uint32_t));
+ hv->rndis_agg_align = sizeof(uint32_t);
+ }
+
+ PMD_INIT_LOG(INFO,
+ "RNDIS ver %u.%u, aggpkt size %u, aggpkt cnt %u, aggpkt align %u",
+ comp.ver_major, comp.ver_minor,
+ hv->rndis_agg_size, hv->rndis_agg_pkts,
+ hv->rndis_agg_align);
+ error = 0;
+done:
+ rte_free(req);
+ return error;
+}
+
+int
+hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr)
+{
+ uint32_t eaddr_len;
+ int error;
+
+ eaddr_len = ETHER_ADDR_LEN;
+ error = hn_rndis_query(hv, OID_802_3_PERMANENT_ADDRESS, NULL, 0,
+ eaddr, eaddr_len);
+ if (error)
+ return error;
+
+ PMD_DRV_LOG(INFO, "MAC address %02x:%02x:%02x:%02x:%02x:%02x",
+ eaddr[0], eaddr[1], eaddr[2],
+ eaddr[3], eaddr[4], eaddr[5]);
+ return 0;
+}
+
+int
+hn_rndis_get_linkstatus(struct hn_data *hv)
+{
+ return hn_rndis_query(hv, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0,
+ &hv->link_status, sizeof(uint32_t));
+}
+
+int
+hn_rndis_get_linkspeed(struct hn_data *hv)
+{
+ return hn_rndis_query(hv, OID_GEN_LINK_SPEED, NULL, 0,
+ &hv->link_speed, sizeof(uint32_t));
+}
+
+int
+hn_rndis_attach(struct hn_data *hv)
+{
+ /* Initialize RNDIS. */
+ return hn_rndis_init(hv);
+}
+
+void
+hn_rndis_detach(struct hn_data *hv)
+{
+ /* Halt the RNDIS. */
+ hn_rndis_halt(hv);
+}
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.h b/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.h
new file mode 100644
index 000000000..319b497a7
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_rndis.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+#include "rndis.h"
+
+struct hn_data;
+
+void hn_rndis_receive_response(struct hn_data *hv,
+ const void *data, uint32_t len);
+void hn_rndis_link_status(struct rte_eth_dev *dev, const void *msg);
+int hn_rndis_attach(struct hn_data *hv);
+void hn_rndis_detach(struct hn_data *hv);
+int hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr);
+int hn_rndis_get_linkstatus(struct hn_data *hv);
+int hn_rndis_get_linkspeed(struct hn_data *hv);
+int hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter);
+void hn_rndis_rx_ctrl(struct hn_data *hv, const void *data,
+ int dlen);
+int hn_rndis_get_offload(struct hn_data *hv,
+ struct rte_eth_dev_info *dev_info);
+int hn_rndis_conf_offload(struct hn_data *hv,
+ uint64_t tx_offloads,
+ uint64_t rx_offloads);
+int hn_rndis_query_rsscaps(struct hn_data *hv,
+ unsigned int *rxr_cnt0);
+int hn_rndis_conf_rss(struct hn_data *hv,
+ const struct rte_eth_rss_conf *rss_conf);
+uint32_t hn_rndis_get_ptypes(struct hn_data *hv);
+
+#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP
+void hn_rndis_dump(const void *buf);
+#else
+#define hn_rndis_dump(buf)
+#endif
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_rxtx.c b/src/seastar/dpdk/drivers/net/netvsc/hn_rxtx.c
new file mode 100644
index 000000000..7d7b55778
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_rxtx.c
@@ -0,0 +1,1470 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016-2018 Microsoft Corporation
+ * Copyright(c) 2013-2016 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <strings.h>
+#include <malloc.h>
+
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_net.h>
+#include <rte_bus_vmbus.h>
+#include <rte_spinlock.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_rndis.h"
+#include "hn_nvs.h"
+#include "ndis.h"
+
+#define HN_NVS_SEND_MSG_SIZE \
+ (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis))
+
+#define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */
+#define HN_TXCOPY_THRESHOLD 512
+
+#define HN_RXCOPY_THRESHOLD 256
+#define HN_RXQ_EVENT_DEFAULT 2048
+
+struct hn_rxinfo {
+ uint32_t vlan_info;
+ uint32_t csum_info;
+ uint32_t hash_info;
+ uint32_t hash_value;
+};
+
+#define HN_RXINFO_VLAN 0x0001
+#define HN_RXINFO_CSUM 0x0002
+#define HN_RXINFO_HASHINF 0x0004
+#define HN_RXINFO_HASHVAL 0x0008
+#define HN_RXINFO_ALL \
+ (HN_RXINFO_VLAN | \
+ HN_RXINFO_CSUM | \
+ HN_RXINFO_HASHINF | \
+ HN_RXINFO_HASHVAL)
+
+#define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
+#define HN_NDIS_RXCSUM_INFO_INVALID 0
+#define HN_NDIS_HASH_INFO_INVALID 0
+
+/*
+ * Per-transmit book keeping.
+ * A slot in transmit ring (chim_index) is reserved for each transmit.
+ *
+ * There are two types of transmit:
+ * - buffered transmit where chimney buffer is used and RNDIS header
+ * is in the buffer. mbuf == NULL for this case.
+ *
+ * - direct transmit where RNDIS header is in the in rndis_pkt
+ * mbuf is freed after transmit.
+ *
+ * Descriptors come from per-port pool which is used
+ * to limit number of outstanding requests per device.
+ */
+struct hn_txdesc {
+ struct rte_mbuf *m;
+
+ uint16_t queue_id;
+ uint16_t chim_index;
+ uint32_t chim_size;
+ uint32_t data_size;
+ uint32_t packets;
+
+ struct rndis_packet_msg *rndis_pkt;
+};
+
+#define HN_RNDIS_PKT_LEN \
+ (sizeof(struct rndis_packet_msg) + \
+ RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \
+ RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
+ RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
+ RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
+
+/* Minimum space required for a packet */
+#define HN_PKTSIZE_MIN(align) \
+ RTE_ALIGN(ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align)
+
+#define DEFAULT_TX_FREE_THRESH 32U
+
+static void
+hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m)
+{
+ uint32_t s = m->pkt_len;
+ const struct ether_addr *ea;
+
+ if (s == 64) {
+ stats->size_bins[1]++;
+ } else if (s > 64 && s < 1024) {
+ uint32_t bin;
+
+ /* count zeros, and offset into correct bin */
+ bin = (sizeof(s) * 8) - __builtin_clz(s) - 5;
+ stats->size_bins[bin]++;
+ } else {
+ if (s < 64)
+ stats->size_bins[0]++;
+ else if (s < 1519)
+ stats->size_bins[6]++;
+ else
+ stats->size_bins[7]++;
+ }
+
+ ea = rte_pktmbuf_mtod(m, const struct ether_addr *);
+ if (is_multicast_ether_addr(ea)) {
+ if (is_broadcast_ether_addr(ea))
+ stats->broadcast++;
+ else
+ stats->multicast++;
+ }
+}
+
+static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt)
+{
+ return pkt->pktinfooffset + pkt->pktinfolen;
+}
+
+static inline uint32_t
+hn_rndis_pktmsg_offset(uint32_t ofs)
+{
+ return ofs - offsetof(struct rndis_packet_msg, dataoffset);
+}
+
+static void hn_txd_init(struct rte_mempool *mp __rte_unused,
+ void *opaque, void *obj, unsigned int idx)
+{
+ struct hn_txdesc *txd = obj;
+ struct rte_eth_dev *dev = opaque;
+ struct rndis_packet_msg *pkt;
+
+ memset(txd, 0, sizeof(*txd));
+ txd->chim_index = idx;
+
+ pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN,
+ rte_align32pow2(HN_RNDIS_PKT_LEN),
+ dev->device->numa_node);
+ if (!pkt)
+ rte_exit(EXIT_FAILURE, "can not allocate RNDIS header");
+
+ txd->rndis_pkt = pkt;
+}
+
+/*
+ * Unlike Linux and FreeBSD, this driver uses a mempool
+ * to limit outstanding transmits and reserve buffers
+ */
+int
+hn_tx_pool_init(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ char name[RTE_MEMPOOL_NAMESIZE];
+ struct rte_mempool *mp;
+
+ snprintf(name, sizeof(name),
+ "hn_txd_%u", dev->data->port_id);
+
+ PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d",
+ name, hv->chim_cnt, sizeof(struct hn_txdesc),
+ dev->device->numa_node);
+
+ mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc),
+ HN_TXD_CACHE_SIZE, 0,
+ NULL, NULL,
+ hn_txd_init, dev,
+ dev->device->numa_node, 0);
+ if (!mp) {
+ PMD_DRV_LOG(ERR,
+ "mempool %s create failed: %d", name, rte_errno);
+ return -rte_errno;
+ }
+
+ hv->tx_pool = mp;
+ return 0;
+}
+
+void
+hn_tx_pool_uninit(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+
+ if (hv->tx_pool) {
+ rte_mempool_free(hv->tx_pool);
+ hv->tx_pool = NULL;
+ }
+}
+
+static void hn_reset_txagg(struct hn_tx_queue *txq)
+{
+ txq->agg_szleft = txq->agg_szmax;
+ txq->agg_pktleft = txq->agg_pktmax;
+ txq->agg_txd = NULL;
+ txq->agg_prevpkt = NULL;
+}
+
+int
+hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc __rte_unused,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct hn_tx_queue *txq;
+ uint32_t tx_free_thresh;
+ int err;
+
+ PMD_INIT_FUNC_TRACE();
+
+ txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE,
+ socket_id);
+ if (!txq)
+ return -ENOMEM;
+
+ txq->hv = hv;
+ txq->chan = hv->channels[queue_idx];
+ txq->port_id = dev->data->port_id;
+ txq->queue_id = queue_idx;
+
+ tx_free_thresh = tx_conf->tx_free_thresh;
+ if (tx_free_thresh == 0)
+ tx_free_thresh = RTE_MIN(hv->chim_cnt / 4,
+ DEFAULT_TX_FREE_THRESH);
+
+ if (tx_free_thresh >= hv->chim_cnt - 3)
+ tx_free_thresh = hv->chim_cnt - 3;
+
+ txq->free_thresh = tx_free_thresh;
+
+ txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
+ txq->agg_pktmax = hv->rndis_agg_pkts;
+ txq->agg_align = hv->rndis_agg_align;
+
+ hn_reset_txagg(txq);
+
+ err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc,
+ socket_id, tx_conf);
+ if (err) {
+ rte_free(txq);
+ return err;
+ }
+
+ dev->data->tx_queues[queue_idx] = txq;
+ return 0;
+}
+
+void
+hn_dev_tx_queue_release(void *arg)
+{
+ struct hn_tx_queue *txq = arg;
+ struct hn_txdesc *txd;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if (!txq)
+ return;
+
+ /* If any pending data is still present just drop it */
+ txd = txq->agg_txd;
+ if (txd)
+ rte_mempool_put(txq->hv->tx_pool, txd);
+
+ rte_free(txq);
+}
+
+static void
+hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
+ unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
+{
+ struct hn_txdesc *txd = (struct hn_txdesc *)xactid;
+ struct hn_tx_queue *txq;
+
+ /* Control packets are sent with xacid == 0 */
+ if (!txd)
+ return;
+
+ txq = dev->data->tx_queues[queue_id];
+ if (likely(ack->status == NVS_STATUS_OK)) {
+ PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u",
+ txq->port_id, txq->queue_id, txd->chim_index,
+ txd->packets, txd->data_size);
+ txq->stats.bytes += txd->data_size;
+ txq->stats.packets += txd->packets;
+ } else {
+ PMD_TX_LOG(NOTICE, "port %u:%u complete tx %u failed status %u",
+ txq->port_id, txq->queue_id, txd->chim_index, ack->status);
+ ++txq->stats.errors;
+ }
+
+ rte_pktmbuf_free(txd->m);
+
+ rte_mempool_put(txq->hv->tx_pool, txd);
+}
+
+/* Handle transmit completion events */
+static void
+hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id,
+ const struct vmbus_chanpkt_hdr *pkt,
+ const void *data)
+{
+ const struct hn_nvs_hdr *hdr = data;
+
+ switch (hdr->type) {
+ case NVS_TYPE_RNDIS_ACK:
+ hn_nvs_send_completed(dev, queue_id, pkt->xactid, data);
+ break;
+
+ default:
+ PMD_TX_LOG(NOTICE,
+ "unexpected send completion type %u",
+ hdr->type);
+ }
+}
+
+/* Parse per-packet info (meta data) */
+static int
+hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen,
+ struct hn_rxinfo *info)
+{
+ const struct rndis_pktinfo *pi = info_data;
+ uint32_t mask = 0;
+
+ while (info_dlen != 0) {
+ const void *data;
+ uint32_t dlen;
+
+ if (unlikely(info_dlen < sizeof(*pi)))
+ return -EINVAL;
+
+ if (unlikely(info_dlen < pi->size))
+ return -EINVAL;
+ info_dlen -= pi->size;
+
+ if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
+ return -EINVAL;
+ if (unlikely(pi->size < pi->offset))
+ return -EINVAL;
+
+ dlen = pi->size - pi->offset;
+ data = pi->data;
+
+ switch (pi->type) {
+ case NDIS_PKTINFO_TYPE_VLAN:
+ if (unlikely(dlen < NDIS_VLAN_INFO_SIZE))
+ return -EINVAL;
+ info->vlan_info = *((const uint32_t *)data);
+ mask |= HN_RXINFO_VLAN;
+ break;
+
+ case NDIS_PKTINFO_TYPE_CSUM:
+ if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE))
+ return -EINVAL;
+ info->csum_info = *((const uint32_t *)data);
+ mask |= HN_RXINFO_CSUM;
+ break;
+
+ case NDIS_PKTINFO_TYPE_HASHVAL:
+ if (unlikely(dlen < NDIS_HASH_VALUE_SIZE))
+ return -EINVAL;
+ info->hash_value = *((const uint32_t *)data);
+ mask |= HN_RXINFO_HASHVAL;
+ break;
+
+ case NDIS_PKTINFO_TYPE_HASHINF:
+ if (unlikely(dlen < NDIS_HASH_INFO_SIZE))
+ return -EINVAL;
+ info->hash_info = *((const uint32_t *)data);
+ mask |= HN_RXINFO_HASHINF;
+ break;
+
+ default:
+ goto next;
+ }
+
+ if (mask == HN_RXINFO_ALL)
+ break; /* All found; done */
+next:
+ pi = (const struct rndis_pktinfo *)
+ ((const uint8_t *)pi + pi->size);
+ }
+
+ /*
+ * Final fixup.
+ * - If there is no hash value, invalidate the hash info.
+ */
+ if (!(mask & HN_RXINFO_HASHVAL))
+ info->hash_info = HN_NDIS_HASH_INFO_INVALID;
+ return 0;
+}
+
+/*
+ * Ack the consumed RXBUF associated w/ this channel packet,
+ * so that this RXBUF can be recycled by the hypervisor.
+ */
+static void hn_rx_buf_release(struct hn_rx_bufinfo *rxb)
+{
+ struct rte_mbuf_ext_shared_info *shinfo = &rxb->shinfo;
+ struct hn_data *hv = rxb->hv;
+
+ if (rte_mbuf_ext_refcnt_update(shinfo, -1) == 0) {
+ hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid);
+ --hv->rxbuf_outstanding;
+ }
+}
+
+static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque)
+{
+ hn_rx_buf_release(opaque);
+}
+
+static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq,
+ const struct vmbus_chanpkt_rxbuf *pkt)
+{
+ struct hn_rx_bufinfo *rxb;
+
+ rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid;
+ rxb->chan = rxq->chan;
+ rxb->xactid = pkt->hdr.xactid;
+ rxb->hv = rxq->hv;
+
+ rxb->shinfo.free_cb = hn_rx_buf_free_cb;
+ rxb->shinfo.fcb_opaque = rxb;
+ rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1);
+ return rxb;
+}
+
+static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb,
+ uint8_t *data, unsigned int headroom, unsigned int dlen,
+ const struct hn_rxinfo *info)
+{
+ struct hn_data *hv = rxq->hv;
+ struct rte_mbuf *m;
+
+ m = rte_pktmbuf_alloc(rxq->mb_pool);
+ if (unlikely(!m)) {
+ struct rte_eth_dev *dev =
+ &rte_eth_devices[rxq->port_id];
+
+ dev->data->rx_mbuf_alloc_failed++;
+ return;
+ }
+
+ /*
+ * For large packets, avoid copy if possible but need to keep
+ * some space available in receive area for later packets.
+ */
+ if (dlen >= HN_RXCOPY_THRESHOLD &&
+ hv->rxbuf_outstanding < hv->rxbuf_section_cnt / 2) {
+ struct rte_mbuf_ext_shared_info *shinfo;
+ const void *rxbuf;
+ rte_iova_t iova;
+
+ /*
+ * Build an external mbuf that points to recveive area.
+ * Use refcount to handle multiple packets in same
+ * receive buffer section.
+ */
+ rxbuf = hv->rxbuf_res->addr;
+ iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf);
+ shinfo = &rxb->shinfo;
+
+ if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 1)
+ ++hv->rxbuf_outstanding;
+
+ rte_pktmbuf_attach_extbuf(m, data, iova,
+ dlen + headroom, shinfo);
+ m->data_off = headroom;
+ } else {
+ /* Mbuf's in pool must be large enough to hold small packets */
+ if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) {
+ rte_pktmbuf_free_seg(m);
+ ++rxq->stats.errors;
+ return;
+ }
+ rte_memcpy(rte_pktmbuf_mtod(m, void *),
+ data + headroom, dlen);
+ }
+
+ m->port = rxq->port_id;
+ m->pkt_len = dlen;
+ m->data_len = dlen;
+ m->packet_type = rte_net_get_ptype(m, NULL,
+ RTE_PTYPE_L2_MASK |
+ RTE_PTYPE_L3_MASK |
+ RTE_PTYPE_L4_MASK);
+
+ if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
+ m->vlan_tci = info->vlan_info;
+ m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN;
+
+ /* NDIS always strips tag, put it back if necessary */
+ if (!hv->vlan_strip && rte_vlan_insert(&m)) {
+ PMD_DRV_LOG(DEBUG, "vlan insert failed");
+ ++rxq->stats.errors;
+ rte_pktmbuf_free(m);
+ return;
+ }
+ }
+
+ if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
+ if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK)
+ m->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+
+ if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK
+ | NDIS_RXCSUM_INFO_TCPCS_OK))
+ m->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+ else if (info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_FAILED
+ | NDIS_RXCSUM_INFO_UDPCS_FAILED))
+ m->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+ }
+
+ if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
+ m->ol_flags |= PKT_RX_RSS_HASH;
+ m->hash.rss = info->hash_value;
+ }
+
+ PMD_RX_LOG(DEBUG,
+ "port %u:%u RX id %"PRIu64" size %u type %#x ol_flags %#"PRIx64,
+ rxq->port_id, rxq->queue_id, rxb->xactid,
+ m->pkt_len, m->packet_type, m->ol_flags);
+
+ ++rxq->stats.packets;
+ rxq->stats.bytes += m->pkt_len;
+ hn_update_packet_stats(&rxq->stats, m);
+
+ if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) {
+ ++rxq->stats.ring_full;
+ rte_pktmbuf_free(m);
+ }
+}
+
+static void hn_rndis_rx_data(struct hn_rx_queue *rxq,
+ struct hn_rx_bufinfo *rxb,
+ void *data, uint32_t dlen)
+{
+ unsigned int data_off, data_len, pktinfo_off, pktinfo_len;
+ const struct rndis_packet_msg *pkt = data;
+ struct hn_rxinfo info = {
+ .vlan_info = HN_NDIS_VLAN_INFO_INVALID,
+ .csum_info = HN_NDIS_RXCSUM_INFO_INVALID,
+ .hash_info = HN_NDIS_HASH_INFO_INVALID,
+ };
+ int err;
+
+ hn_rndis_dump(pkt);
+
+ if (unlikely(dlen < sizeof(*pkt)))
+ goto error;
+
+ if (unlikely(dlen < pkt->len))
+ goto error; /* truncated RNDIS from host */
+
+ if (unlikely(pkt->len < pkt->datalen
+ + pkt->oobdatalen + pkt->pktinfolen))
+ goto error;
+
+ if (unlikely(pkt->datalen == 0))
+ goto error;
+
+ /* Check offsets. */
+ if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN))
+ goto error;
+
+ if (likely(pkt->pktinfooffset > 0) &&
+ unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN ||
+ (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)))
+ goto error;
+
+ data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset);
+ data_len = pkt->datalen;
+ pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset);
+ pktinfo_len = pkt->pktinfolen;
+
+ if (likely(pktinfo_len > 0)) {
+ err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off,
+ pktinfo_len, &info);
+ if (err)
+ goto error;
+ }
+
+ if (unlikely(data_off + data_len > pkt->len))
+ goto error;
+
+ if (unlikely(data_len < ETHER_HDR_LEN))
+ goto error;
+
+ hn_rxpkt(rxq, rxb, data, data_off, data_len, &info);
+ return;
+error:
+ ++rxq->stats.errors;
+}
+
+static void
+hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
+ struct hn_rx_bufinfo *rxb, void *buf, uint32_t len)
+{
+ const struct rndis_msghdr *hdr = buf;
+
+ switch (hdr->type) {
+ case RNDIS_PACKET_MSG:
+ if (dev->data->dev_started)
+ hn_rndis_rx_data(rxq, rxb, buf, len);
+ break;
+
+ case RNDIS_INDICATE_STATUS_MSG:
+ hn_rndis_link_status(dev, buf);
+ break;
+
+ case RNDIS_INITIALIZE_CMPLT:
+ case RNDIS_QUERY_CMPLT:
+ case RNDIS_SET_CMPLT:
+ hn_rndis_receive_response(rxq->hv, buf, len);
+ break;
+
+ default:
+ PMD_DRV_LOG(NOTICE,
+ "unexpected RNDIS message (type %#x len %u)",
+ hdr->type, len);
+ break;
+ }
+}
+
+static void
+hn_nvs_handle_rxbuf(struct rte_eth_dev *dev,
+ struct hn_data *hv,
+ struct hn_rx_queue *rxq,
+ const struct vmbus_chanpkt_hdr *hdr,
+ const void *buf)
+{
+ const struct vmbus_chanpkt_rxbuf *pkt;
+ const struct hn_nvs_hdr *nvs_hdr = buf;
+ uint32_t rxbuf_sz = hv->rxbuf_res->len;
+ char *rxbuf = hv->rxbuf_res->addr;
+ unsigned int i, hlen, count;
+ struct hn_rx_bufinfo *rxb;
+
+ /* At minimum we need type header */
+ if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) {
+ PMD_RX_LOG(ERR, "invalid receive nvs RNDIS");
+ return;
+ }
+
+ /* Make sure that this is a RNDIS message. */
+ if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) {
+ PMD_RX_LOG(ERR, "nvs type %u, not RNDIS",
+ nvs_hdr->type);
+ return;
+ }
+
+ hlen = vmbus_chanpkt_getlen(hdr->hlen);
+ if (unlikely(hlen < sizeof(*pkt))) {
+ PMD_RX_LOG(ERR, "invalid rxbuf chanpkt");
+ return;
+ }
+
+ pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr);
+ if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) {
+ PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x",
+ pkt->rxbuf_id);
+ return;
+ }
+
+ count = pkt->rxbuf_cnt;
+ if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf,
+ rxbuf[count]))) {
+ PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count);
+ return;
+ }
+
+ if (pkt->hdr.xactid > hv->rxbuf_section_cnt) {
+ PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64,
+ pkt->hdr.xactid);
+ return;
+ }
+
+ /* Setup receive buffer info to allow for callback */
+ rxb = hn_rx_buf_init(rxq, pkt);
+
+ /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
+ for (i = 0; i < count; ++i) {
+ unsigned int ofs, len;
+
+ ofs = pkt->rxbuf[i].ofs;
+ len = pkt->rxbuf[i].len;
+
+ if (unlikely(ofs + len > rxbuf_sz)) {
+ PMD_RX_LOG(ERR,
+ "%uth RNDIS msg overflow ofs %u, len %u",
+ i, ofs, len);
+ continue;
+ }
+
+ if (unlikely(len == 0)) {
+ PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len);
+ continue;
+ }
+
+ hn_rndis_receive(dev, rxq, rxb,
+ rxbuf + ofs, len);
+ }
+
+ /* Send ACK now if external mbuf not used */
+ hn_rx_buf_release(rxb);
+}
+
+/*
+ * Called when NVS inband events are received.
+ * Send up a two part message with port_id and the NVS message
+ * to the pipe to the netvsc-vf-event control thread.
+ */
+static void hn_nvs_handle_notify(struct rte_eth_dev *dev,
+ const struct vmbus_chanpkt_hdr *pkt,
+ const void *data)
+{
+ const struct hn_nvs_hdr *hdr = data;
+
+ switch (hdr->type) {
+ case NVS_TYPE_TXTBL_NOTE:
+ /* Transmit indirection table has locking problems
+ * in DPDK and therefore not implemented
+ */
+ PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table");
+ break;
+
+ case NVS_TYPE_VFASSOC_NOTE:
+ hn_nvs_handle_vfassoc(dev, pkt, data);
+ break;
+
+ default:
+ PMD_DRV_LOG(INFO,
+ "got notify, nvs type %u", hdr->type);
+ }
+}
+
+struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
+ uint16_t queue_id,
+ unsigned int socket_id)
+{
+ struct hn_rx_queue *rxq;
+
+ rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (!rxq)
+ return NULL;
+
+ rxq->hv = hv;
+ rxq->chan = hv->channels[queue_id];
+ rte_spinlock_init(&rxq->ring_lock);
+ rxq->port_id = hv->port_id;
+ rxq->queue_id = queue_id;
+ rxq->event_sz = HN_RXQ_EVENT_DEFAULT;
+ rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT,
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (!rxq->event_buf) {
+ rte_free(rxq);
+ return NULL;
+ }
+
+ return rxq;
+}
+
+int
+hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ char ring_name[RTE_RING_NAMESIZE];
+ struct hn_rx_queue *rxq;
+ unsigned int count;
+ int error = -ENOMEM;
+
+ PMD_INIT_FUNC_TRACE();
+
+ if (queue_idx == 0) {
+ rxq = hv->primary;
+ } else {
+ rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);
+ if (!rxq)
+ return -ENOMEM;
+ }
+
+ rxq->mb_pool = mp;
+ count = rte_mempool_avail_count(mp) / dev->data->nb_rx_queues;
+ if (nb_desc == 0 || nb_desc > count)
+ nb_desc = count;
+
+ /*
+ * Staging ring from receive event logic to rx_pkts.
+ * rx_pkts assumes caller is handling multi-thread issue.
+ * event logic has locking.
+ */
+ snprintf(ring_name, sizeof(ring_name),
+ "hn_rx_%u_%u", dev->data->port_id, queue_idx);
+ rxq->rx_ring = rte_ring_create(ring_name,
+ rte_align32pow2(nb_desc),
+ socket_id, 0);
+ if (!rxq->rx_ring)
+ goto fail;
+
+ error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc,
+ socket_id, rx_conf, mp);
+ if (error)
+ goto fail;
+
+ dev->data->rx_queues[queue_idx] = rxq;
+ return 0;
+
+fail:
+ rte_ring_free(rxq->rx_ring);
+ rte_free(rxq->event_buf);
+ rte_free(rxq);
+ return error;
+}
+
+static void
+hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary)
+{
+
+ if (!rxq)
+ return;
+
+ rte_ring_free(rxq->rx_ring);
+ rxq->rx_ring = NULL;
+ rxq->mb_pool = NULL;
+
+ hn_vf_rx_queue_release(rxq->hv, rxq->queue_id);
+
+ /* Keep primary queue to allow for control operations */
+ if (keep_primary && rxq == rxq->hv->primary)
+ return;
+
+ rte_free(rxq->event_buf);
+ rte_free(rxq);
+}
+
+void
+hn_dev_rx_queue_release(void *arg)
+{
+ struct hn_rx_queue *rxq = arg;
+
+ PMD_INIT_FUNC_TRACE();
+
+ hn_rx_queue_free(rxq, true);
+}
+
+int
+hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt)
+{
+ struct hn_tx_queue *txq = arg;
+
+ return hn_process_events(txq->hv, txq->queue_id, free_cnt);
+}
+
+/*
+ * Process pending events on the channel.
+ * Called from both Rx queue poll and Tx cleanup
+ */
+uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id,
+ uint32_t tx_limit)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id];
+ struct hn_rx_queue *rxq;
+ uint32_t bytes_read = 0;
+ uint32_t tx_done = 0;
+ int ret = 0;
+
+ rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id];
+
+ /* If no pending data then nothing to do */
+ if (rte_vmbus_chan_rx_empty(rxq->chan))
+ return 0;
+
+ /*
+ * Since channel is shared between Rx and TX queue need to have a lock
+ * since DPDK does not force same CPU to be used for Rx/Tx.
+ */
+ if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock)))
+ return 0;
+
+ for (;;) {
+ const struct vmbus_chanpkt_hdr *pkt;
+ uint32_t len = rxq->event_sz;
+ const void *data;
+
+retry:
+ ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len);
+ if (ret == -EAGAIN)
+ break; /* ring is empty */
+
+ if (unlikely(ret == -ENOBUFS)) {
+ /* event buffer not large enough to read ring */
+
+ PMD_DRV_LOG(DEBUG,
+ "event buffer expansion (need %u)", len);
+ rxq->event_sz = len + len / 4;
+ rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz,
+ RTE_CACHE_LINE_SIZE);
+ if (rxq->event_buf)
+ goto retry;
+ /* out of memory, no more events now */
+ rxq->event_sz = 0;
+ break;
+ }
+
+ if (unlikely(ret <= 0)) {
+ /* This indicates a failure to communicate (or worse) */
+ rte_exit(EXIT_FAILURE,
+ "vmbus ring buffer error: %d", ret);
+ }
+
+ bytes_read += ret;
+ pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf;
+ data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen);
+
+ switch (pkt->type) {
+ case VMBUS_CHANPKT_TYPE_COMP:
+ ++tx_done;
+ hn_nvs_handle_comp(dev, queue_id, pkt, data);
+ break;
+
+ case VMBUS_CHANPKT_TYPE_RXBUF:
+ hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data);
+ break;
+
+ case VMBUS_CHANPKT_TYPE_INBAND:
+ hn_nvs_handle_notify(dev, pkt, data);
+ break;
+
+ default:
+ PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type);
+ break;
+ }
+
+ if (tx_limit && tx_done >= tx_limit)
+ break;
+
+ if (rxq->rx_ring && rte_ring_full(rxq->rx_ring))
+ break;
+ }
+
+ if (bytes_read > 0)
+ rte_vmbus_chan_signal_read(rxq->chan, bytes_read);
+
+ rte_spinlock_unlock(&rxq->ring_lock);
+
+ return tx_done;
+}
+
+static void hn_append_to_chim(struct hn_tx_queue *txq,
+ struct rndis_packet_msg *pkt,
+ const struct rte_mbuf *m)
+{
+ struct hn_txdesc *txd = txq->agg_txd;
+ uint8_t *buf = (uint8_t *)pkt;
+ unsigned int data_offs;
+
+ hn_rndis_dump(pkt);
+
+ data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset);
+ txd->chim_size += pkt->len;
+ txd->data_size += m->pkt_len;
+ ++txd->packets;
+ hn_update_packet_stats(&txq->stats, m);
+
+ for (; m; m = m->next) {
+ uint16_t len = rte_pktmbuf_data_len(m);
+
+ rte_memcpy(buf + data_offs,
+ rte_pktmbuf_mtod(m, const char *), len);
+ data_offs += len;
+ }
+}
+
+/*
+ * Send pending aggregated data in chimney buffer (if any).
+ * Returns error if send was unsuccessful because channel ring buffer
+ * was full.
+ */
+static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig)
+
+{
+ struct hn_txdesc *txd = txq->agg_txd;
+ struct hn_nvs_rndis rndis;
+ int ret;
+
+ if (!txd)
+ return 0;
+
+ rndis = (struct hn_nvs_rndis) {
+ .type = NVS_TYPE_RNDIS,
+ .rndis_mtype = NVS_RNDIS_MTYPE_DATA,
+ .chim_idx = txd->chim_index,
+ .chim_sz = txd->chim_size,
+ };
+
+ PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u",
+ txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size);
+
+ ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC,
+ &rndis, sizeof(rndis), (uintptr_t)txd, need_sig);
+
+ if (likely(ret == 0))
+ hn_reset_txagg(txq);
+ else
+ PMD_TX_LOG(NOTICE, "port %u:%u send failed: %d",
+ txq->port_id, txq->queue_id, ret);
+
+ return ret;
+}
+
+static struct hn_txdesc *hn_new_txd(struct hn_data *hv,
+ struct hn_tx_queue *txq)
+{
+ struct hn_txdesc *txd;
+
+ if (rte_mempool_get(hv->tx_pool, (void **)&txd)) {
+ ++txq->stats.ring_full;
+ PMD_TX_LOG(DEBUG, "tx pool exhausted!");
+ return NULL;
+ }
+
+ txd->m = NULL;
+ txd->queue_id = txq->queue_id;
+ txd->packets = 0;
+ txd->data_size = 0;
+ txd->chim_size = 0;
+
+ return txd;
+}
+
+static void *
+hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize)
+{
+ struct hn_txdesc *agg_txd = txq->agg_txd;
+ struct rndis_packet_msg *pkt;
+ void *chim;
+
+ if (agg_txd) {
+ unsigned int padding, olen;
+
+ /*
+ * Update the previous RNDIS packet's total length,
+ * it can be increased due to the mandatory alignment
+ * padding for this RNDIS packet. And update the
+ * aggregating txdesc's chimney sending buffer size
+ * accordingly.
+ *
+ * Zero-out the padding, as required by the RNDIS spec.
+ */
+ pkt = txq->agg_prevpkt;
+ olen = pkt->len;
+ padding = RTE_ALIGN(olen, txq->agg_align) - olen;
+ if (padding > 0) {
+ agg_txd->chim_size += padding;
+ pkt->len += padding;
+ memset((uint8_t *)pkt + olen, 0, padding);
+ }
+
+ chim = (uint8_t *)pkt + pkt->len;
+
+ txq->agg_pktleft--;
+ txq->agg_szleft -= pktsize;
+ if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) {
+ /*
+ * Probably can't aggregate more packets,
+ * flush this aggregating txdesc proactively.
+ */
+ txq->agg_pktleft = 0;
+ }
+ } else {
+ agg_txd = hn_new_txd(hv, txq);
+ if (!agg_txd)
+ return NULL;
+
+ chim = (uint8_t *)hv->chim_res->addr
+ + agg_txd->chim_index * hv->chim_szmax;
+
+ txq->agg_txd = agg_txd;
+ txq->agg_pktleft = txq->agg_pktmax - 1;
+ txq->agg_szleft = txq->agg_szmax - pktsize;
+ }
+ txq->agg_prevpkt = chim;
+
+ return chim;
+}
+
+static inline void *
+hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt,
+ uint32_t pi_dlen, uint32_t pi_type)
+{
+ const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen);
+ struct rndis_pktinfo *pi;
+
+ /*
+ * Per-packet-info does not move; it only grows.
+ *
+ * NOTE:
+ * pktinfooffset in this phase counts from the beginning
+ * of rndis_packet_msg.
+ */
+ pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt));
+
+ pkt->pktinfolen += pi_size;
+
+ pi->size = pi_size;
+ pi->type = pi_type;
+ pi->offset = RNDIS_PKTINFO_OFFSET;
+
+ return pi->data;
+}
+
+/* Put RNDIS header and packet info on packet */
+static void hn_encap(struct rndis_packet_msg *pkt,
+ uint16_t queue_id,
+ const struct rte_mbuf *m)
+{
+ unsigned int hlen = m->l2_len + m->l3_len;
+ uint32_t *pi_data;
+ uint32_t pkt_hlen;
+
+ pkt->type = RNDIS_PACKET_MSG;
+ pkt->len = m->pkt_len;
+ pkt->dataoffset = 0;
+ pkt->datalen = m->pkt_len;
+ pkt->oobdataoffset = 0;
+ pkt->oobdatalen = 0;
+ pkt->oobdataelements = 0;
+ pkt->pktinfooffset = sizeof(*pkt);
+ pkt->pktinfolen = 0;
+ pkt->vchandle = 0;
+ pkt->reserved = 0;
+
+ /*
+ * Set the hash value for this packet, to the queue_id to cause
+ * TX done event for this packet on the right channel.
+ */
+ pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE,
+ NDIS_PKTINFO_TYPE_HASHVAL);
+ *pi_data = queue_id;
+
+ if (m->ol_flags & PKT_TX_VLAN_PKT) {
+ pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE,
+ NDIS_PKTINFO_TYPE_VLAN);
+ *pi_data = m->vlan_tci;
+ }
+
+ if (m->ol_flags & PKT_TX_TCP_SEG) {
+ pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE,
+ NDIS_PKTINFO_TYPE_LSO);
+
+ if (m->ol_flags & PKT_TX_IPV6) {
+ *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen,
+ m->tso_segsz);
+ } else {
+ *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen,
+ m->tso_segsz);
+ }
+ } else if (m->ol_flags &
+ (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) {
+ pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE,
+ NDIS_PKTINFO_TYPE_CSUM);
+ *pi_data = 0;
+
+ if (m->ol_flags & PKT_TX_IPV6)
+ *pi_data |= NDIS_TXCSUM_INFO_IPV6;
+ if (m->ol_flags & PKT_TX_IPV4) {
+ *pi_data |= NDIS_TXCSUM_INFO_IPV4;
+
+ if (m->ol_flags & PKT_TX_IP_CKSUM)
+ *pi_data |= NDIS_TXCSUM_INFO_IPCS;
+ }
+
+ if (m->ol_flags & PKT_TX_TCP_CKSUM)
+ *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen);
+ else if (m->ol_flags & PKT_TX_UDP_CKSUM)
+ *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen);
+ }
+
+ pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen;
+ /* Fixup RNDIS packet message total length */
+ pkt->len += pkt_hlen;
+
+ /* Convert RNDIS packet message offsets */
+ pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
+ pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset);
+}
+
+/* How many scatter gather list elements ar needed */
+static unsigned int hn_get_slots(const struct rte_mbuf *m)
+{
+ unsigned int slots = 1; /* for RNDIS header */
+
+ while (m) {
+ unsigned int size = rte_pktmbuf_data_len(m);
+ unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK;
+
+ slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE;
+ m = m->next;
+ }
+
+ return slots;
+}
+
+/* Build scatter gather list from chained mbuf */
+static unsigned int hn_fill_sg(struct vmbus_gpa *sg,
+ const struct rte_mbuf *m)
+{
+ unsigned int segs = 0;
+
+ while (m) {
+ rte_iova_t addr = rte_mbuf_data_iova(m);
+ unsigned int page = addr / PAGE_SIZE;
+ unsigned int offset = addr & PAGE_MASK;
+ unsigned int len = rte_pktmbuf_data_len(m);
+
+ while (len > 0) {
+ unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset);
+
+ sg[segs].page = page;
+ sg[segs].ofs = offset;
+ sg[segs].len = bytes;
+ segs++;
+
+ ++page;
+ offset = 0;
+ len -= bytes;
+ }
+ m = m->next;
+ }
+
+ return segs;
+}
+
+/* Transmit directly from mbuf */
+static int hn_xmit_sg(struct hn_tx_queue *txq,
+ const struct hn_txdesc *txd, const struct rte_mbuf *m,
+ bool *need_sig)
+{
+ struct vmbus_gpa sg[hn_get_slots(m)];
+ struct hn_nvs_rndis nvs_rndis = {
+ .type = NVS_TYPE_RNDIS,
+ .rndis_mtype = NVS_RNDIS_MTYPE_DATA,
+ .chim_sz = txd->chim_size,
+ };
+ rte_iova_t addr;
+ unsigned int segs;
+
+ /* attach aggregation data if present */
+ if (txd->chim_size > 0)
+ nvs_rndis.chim_idx = txd->chim_index;
+ else
+ nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID;
+
+ hn_rndis_dump(txd->rndis_pkt);
+
+ /* pass IOVA of rndis header in first segment */
+ addr = rte_malloc_virt2iova(txd->rndis_pkt);
+ if (unlikely(addr == RTE_BAD_IOVA)) {
+ PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova");
+ return -EINVAL;
+ }
+
+ sg[0].page = addr / PAGE_SIZE;
+ sg[0].ofs = addr & PAGE_MASK;
+ sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt));
+ segs = 1;
+
+ hn_update_packet_stats(&txq->stats, m);
+
+ segs += hn_fill_sg(sg + 1, m);
+
+ PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u",
+ txq->port_id, txq->queue_id, txd->chim_index,
+ segs, nvs_rndis.chim_sz);
+
+ return hn_nvs_send_sglist(txq->chan, sg, segs,
+ &nvs_rndis, sizeof(nvs_rndis),
+ (uintptr_t)txd, need_sig);
+}
+
+uint16_t
+hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct hn_tx_queue *txq = ptxq;
+ uint16_t queue_id = txq->queue_id;
+ struct hn_data *hv = txq->hv;
+ struct rte_eth_dev *vf_dev;
+ bool need_sig = false;
+ uint16_t nb_tx;
+ int ret;
+
+ if (unlikely(hv->closed))
+ return 0;
+
+ /* Transmit over VF if present and up */
+ vf_dev = hn_get_vf_dev(hv);
+
+ if (vf_dev && vf_dev->data->dev_started) {
+ void *sub_q = vf_dev->data->tx_queues[queue_id];
+
+ return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
+ }
+
+ if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
+ hn_process_events(hv, txq->queue_id, 0);
+
+ for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+ struct rte_mbuf *m = tx_pkts[nb_tx];
+ uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN;
+ struct rndis_packet_msg *pkt;
+
+ /* For small packets aggregate them in chimney buffer */
+ if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) {
+ /* If this packet will not fit, then flush */
+ if (txq->agg_pktleft == 0 ||
+ RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) {
+ if (hn_flush_txagg(txq, &need_sig))
+ goto fail;
+ }
+
+ pkt = hn_try_txagg(hv, txq, pkt_size);
+ if (unlikely(!pkt))
+ break;
+
+ hn_encap(pkt, queue_id, m);
+ hn_append_to_chim(txq, pkt, m);
+
+ rte_pktmbuf_free(m);
+
+ /* if buffer is full, flush */
+ if (txq->agg_pktleft == 0 &&
+ hn_flush_txagg(txq, &need_sig))
+ goto fail;
+ } else {
+ struct hn_txdesc *txd;
+
+ /* can send chimney data and large packet at once */
+ txd = txq->agg_txd;
+ if (txd) {
+ hn_reset_txagg(txq);
+ } else {
+ txd = hn_new_txd(hv, txq);
+ if (unlikely(!txd))
+ break;
+ }
+
+ pkt = txd->rndis_pkt;
+ txd->m = m;
+ txd->data_size += m->pkt_len;
+ ++txd->packets;
+
+ hn_encap(pkt, queue_id, m);
+
+ ret = hn_xmit_sg(txq, txd, m, &need_sig);
+ if (unlikely(ret != 0)) {
+ PMD_TX_LOG(NOTICE, "sg send failed: %d", ret);
+ ++txq->stats.errors;
+ rte_mempool_put(hv->tx_pool, txd);
+ goto fail;
+ }
+ }
+ }
+
+ /* If partial buffer left, then try and send it.
+ * if that fails, then reuse it on next send.
+ */
+ hn_flush_txagg(txq, &need_sig);
+
+fail:
+ if (need_sig)
+ rte_vmbus_chan_signal_tx(txq->chan);
+
+ return nb_tx;
+}
+
+static uint16_t
+hn_recv_vf(uint16_t vf_port, const struct hn_rx_queue *rxq,
+ struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ uint16_t i, n;
+
+ if (unlikely(nb_pkts == 0))
+ return 0;
+
+ n = rte_eth_rx_burst(vf_port, rxq->queue_id, rx_pkts, nb_pkts);
+
+ /* relabel the received mbufs */
+ for (i = 0; i < n; i++)
+ rx_pkts[i]->port = rxq->port_id;
+
+ return n;
+}
+
+uint16_t
+hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct hn_rx_queue *rxq = prxq;
+ struct hn_data *hv = rxq->hv;
+ struct rte_eth_dev *vf_dev;
+ uint16_t nb_rcv;
+
+ if (unlikely(hv->closed))
+ return 0;
+
+ /* Receive from VF if present and up */
+ vf_dev = hn_get_vf_dev(hv);
+
+ /* Check for new completions */
+ if (likely(rte_ring_count(rxq->rx_ring) < nb_pkts))
+ hn_process_events(hv, rxq->queue_id, 0);
+
+ /* Always check the vmbus path for multicast and new flows */
+ nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring,
+ (void **)rx_pkts, nb_pkts, NULL);
+
+ /* If VF is available, check that as well */
+ if (vf_dev && vf_dev->data->dev_started)
+ nb_rcv += hn_recv_vf(vf_dev->data->port_id, rxq,
+ rx_pkts + nb_rcv, nb_pkts - nb_rcv);
+
+ return nb_rcv;
+}
+
+void
+hn_dev_free_queues(struct rte_eth_dev *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->data->nb_rx_queues; i++) {
+ struct hn_rx_queue *rxq = dev->data->rx_queues[i];
+
+ hn_rx_queue_free(rxq, false);
+ dev->data->rx_queues[i] = NULL;
+ }
+ dev->data->nb_rx_queues = 0;
+
+ for (i = 0; i < dev->data->nb_tx_queues; i++) {
+ hn_dev_tx_queue_release(dev->data->tx_queues[i]);
+ dev->data->tx_queues[i] = NULL;
+ }
+ dev->data->nb_tx_queues = 0;
+}
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_var.h b/src/seastar/dpdk/drivers/net/netvsc/hn_var.h
new file mode 100644
index 000000000..de885d898
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_var.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2009-2018 Microsoft Corp.
+ * Copyright (c) 2016 Brocade Communications Systems, Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ */
+
+/*
+ * Tunable ethdev params
+ */
+#define HN_MIN_RX_BUF_SIZE 1024
+#define HN_MAX_XFER_LEN 2048
+#define HN_MAX_MAC_ADDRS 1
+#define HN_MAX_CHANNELS 64
+
+/* Claimed to be 12232B */
+#define HN_MTU_MAX (9 * 1024)
+
+/* Retry interval */
+#define HN_CHAN_INTERVAL_US 100
+
+/* Host monitor interval */
+#define HN_CHAN_LATENCY_NS 50000
+
+/* Buffers need to be aligned */
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
+#ifndef PAGE_MASK
+#define PAGE_MASK (PAGE_SIZE - 1)
+#endif
+
+struct hn_data;
+struct hn_txdesc;
+
+struct hn_stats {
+ uint64_t packets;
+ uint64_t bytes;
+ uint64_t errors;
+ uint64_t ring_full;
+ uint64_t multicast;
+ uint64_t broadcast;
+ /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
+ uint64_t size_bins[8];
+};
+
+struct hn_tx_queue {
+ struct hn_data *hv;
+ struct vmbus_channel *chan;
+ uint16_t port_id;
+ uint16_t queue_id;
+ uint32_t free_thresh;
+
+ /* Applied packet transmission aggregation limits. */
+ uint32_t agg_szmax;
+ uint32_t agg_pktmax;
+ uint32_t agg_align;
+
+ /* Packet transmission aggregation states */
+ struct hn_txdesc *agg_txd;
+ uint32_t agg_pktleft;
+ uint32_t agg_szleft;
+ struct rndis_packet_msg *agg_prevpkt;
+
+ struct hn_stats stats;
+};
+
+struct hn_rx_queue {
+ struct hn_data *hv;
+ struct vmbus_channel *chan;
+ struct rte_mempool *mb_pool;
+ struct rte_ring *rx_ring;
+
+ rte_spinlock_t ring_lock;
+ uint32_t event_sz;
+ uint16_t port_id;
+ uint16_t queue_id;
+ struct hn_stats stats;
+
+ void *event_buf;
+};
+
+
+/* multi-packet data from host */
+struct hn_rx_bufinfo {
+ struct vmbus_channel *chan;
+ struct hn_data *hv;
+ uint64_t xactid;
+ struct rte_mbuf_ext_shared_info shinfo;
+} __rte_cache_aligned;
+
+#define HN_INVALID_PORT UINT16_MAX
+
+struct hn_data {
+ struct rte_vmbus_device *vmbus;
+ struct hn_rx_queue *primary;
+ rte_spinlock_t vf_lock;
+ uint16_t port_id;
+ uint16_t vf_port;
+
+ uint8_t vf_present;
+ uint8_t closed;
+ uint8_t vlan_strip;
+
+ uint32_t link_status;
+ uint32_t link_speed;
+
+ struct rte_mem_resource *rxbuf_res; /* UIO resource for Rx */
+ struct hn_rx_bufinfo *rxbuf_info;
+ uint32_t rxbuf_section_cnt; /* # of Rx sections */
+ volatile uint32_t rxbuf_outstanding;
+ uint16_t max_queues; /* Max available queues */
+ uint16_t num_queues;
+ uint64_t rss_offloads;
+
+ struct rte_mem_resource *chim_res; /* UIO resource for Tx */
+ struct rte_mempool *tx_pool; /* Tx descriptors */
+ uint32_t chim_szmax; /* Max size per buffer */
+ uint32_t chim_cnt; /* Max packets per buffer */
+
+ uint32_t latency;
+ uint32_t nvs_ver;
+ uint32_t ndis_ver;
+ uint32_t rndis_agg_size;
+ uint32_t rndis_agg_pkts;
+ uint32_t rndis_agg_align;
+
+ volatile uint32_t rndis_pending;
+ rte_atomic32_t rndis_req_id;
+ uint8_t rndis_resp[256];
+
+ struct ether_addr mac_addr;
+
+ struct rte_eth_dev_owner owner;
+ struct rte_intr_handle vf_intr;
+
+ struct vmbus_channel *channels[HN_MAX_CHANNELS];
+};
+
+static inline struct vmbus_channel *
+hn_primary_chan(const struct hn_data *hv)
+{
+ return hv->channels[0];
+}
+
+uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id,
+ uint32_t tx_limit);
+
+uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
+int hn_tx_pool_init(struct rte_eth_dev *dev);
+void hn_tx_pool_uninit(struct rte_eth_dev *dev);
+int hn_dev_link_update(struct rte_eth_dev *dev, int wait);
+int hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
+ uint16_t nb_desc, unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+void hn_dev_tx_queue_release(void *arg);
+void hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
+ struct rte_eth_txq_info *qinfo);
+int hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt);
+
+struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
+ uint16_t queue_id,
+ unsigned int socket_id);
+int hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp);
+void hn_dev_rx_queue_release(void *arg);
+void hn_dev_free_queues(struct rte_eth_dev *dev);
+
+/* Check if VF is attached */
+static inline bool
+hn_vf_attached(const struct hn_data *hv)
+{
+ return hv->vf_port != HN_INVALID_PORT;
+}
+
+/* Get VF device for existing netvsc device */
+static inline struct rte_eth_dev *
+hn_get_vf_dev(const struct hn_data *hv)
+{
+ uint16_t vf_port = hv->vf_port;
+
+ /* make sure vf_port is loaded */
+ rte_smp_rmb();
+
+ if (vf_port == HN_INVALID_PORT)
+ return NULL;
+ else
+ return &rte_eth_devices[vf_port];
+}
+
+void hn_vf_info_get(struct hn_data *hv,
+ struct rte_eth_dev_info *info);
+int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
+int hn_vf_configure(struct rte_eth_dev *dev,
+ const struct rte_eth_conf *dev_conf);
+const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev);
+int hn_vf_start(struct rte_eth_dev *dev);
+void hn_vf_reset(struct rte_eth_dev *dev);
+void hn_vf_stop(struct rte_eth_dev *dev);
+void hn_vf_close(struct rte_eth_dev *dev);
+
+void hn_vf_allmulticast_enable(struct rte_eth_dev *dev);
+void hn_vf_allmulticast_disable(struct rte_eth_dev *dev);
+void hn_vf_promiscuous_enable(struct rte_eth_dev *dev);
+void hn_vf_promiscuous_disable(struct rte_eth_dev *dev);
+int hn_vf_mc_addr_list(struct rte_eth_dev *dev,
+ struct ether_addr *mc_addr_set,
+ uint32_t nb_mc_addr);
+
+int hn_vf_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete);
+int hn_vf_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id);
+int hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp);
+void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id);
+
+int hn_vf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+void hn_vf_stats_reset(struct rte_eth_dev *dev);
+int hn_vf_xstats_get_names(struct rte_eth_dev *dev,
+ struct rte_eth_xstat_name *xstats_names,
+ unsigned int size);
+int hn_vf_xstats_get(struct rte_eth_dev *dev,
+ struct rte_eth_xstat *xstats,
+ unsigned int n);
+void hn_vf_xstats_reset(struct rte_eth_dev *dev);
diff --git a/src/seastar/dpdk/drivers/net/netvsc/hn_vf.c b/src/seastar/dpdk/drivers/net/netvsc/hn_vf.c
new file mode 100644
index 000000000..b980bb8a4
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/hn_vf.c
@@ -0,0 +1,555 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_bus_vmbus.h>
+#include <rte_pci.h>
+#include <rte_bus_pci.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_nvs.h"
+
+/* Search for VF with matching MAC address, return port id */
+static int hn_vf_match(const struct rte_eth_dev *dev)
+{
+ const struct ether_addr *mac = dev->data->mac_addrs;
+ int i;
+
+ RTE_ETH_FOREACH_DEV(i) {
+ const struct rte_eth_dev *vf_dev = &rte_eth_devices[i];
+ const struct ether_addr *vf_mac = vf_dev->data->mac_addrs;
+
+ if (vf_dev == dev)
+ continue;
+
+ if (is_same_ether_addr(mac, vf_mac))
+ return i;
+ }
+ return -ENOENT;
+}
+
+
+/*
+ * Attach new PCI VF device and return the port_id
+ */
+static int hn_vf_attach(struct hn_data *hv, uint16_t port_id)
+{
+ struct rte_eth_dev_owner owner = { .id = RTE_ETH_DEV_NO_OWNER };
+ int ret;
+
+ if (hn_vf_attached(hv)) {
+ PMD_DRV_LOG(ERR, "VF already attached");
+ return -EEXIST;
+ }
+
+ ret = rte_eth_dev_owner_get(port_id, &owner);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Can not find owner for port %d", port_id);
+ return ret;
+ }
+
+ if (owner.id != RTE_ETH_DEV_NO_OWNER) {
+ PMD_DRV_LOG(ERR, "Port %u already owned by other device %s",
+ port_id, owner.name);
+ return -EBUSY;
+ }
+
+ ret = rte_eth_dev_owner_set(port_id, &hv->owner);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Can set owner for port %d", port_id);
+ return ret;
+ }
+
+ PMD_DRV_LOG(DEBUG, "Attach VF device %u", port_id);
+ hv->vf_port = port_id;
+ rte_smp_wmb();
+
+ return 0;
+}
+
+/* Add new VF device to synthetic device */
+int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
+{
+ int port, err;
+
+ port = hn_vf_match(dev);
+ if (port < 0) {
+ PMD_DRV_LOG(NOTICE, "No matching MAC found");
+ return port;
+ }
+
+ rte_spinlock_lock(&hv->vf_lock);
+ err = hn_vf_attach(hv, port);
+
+ if (err == 0) {
+ dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+ hv->vf_intr = (struct rte_intr_handle) {
+ .fd = -1,
+ .type = RTE_INTR_HANDLE_EXT,
+ };
+ dev->intr_handle = &hv->vf_intr;
+ hn_nvs_set_datapath(hv, NVS_DATAPATH_VF);
+ }
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ return err;
+}
+
+/* Remove new VF device */
+static void hn_vf_remove(struct hn_data *hv)
+{
+
+ rte_spinlock_lock(&hv->vf_lock);
+
+ if (!hn_vf_attached(hv)) {
+ PMD_DRV_LOG(ERR, "VF path not active");
+ } else {
+ /* Stop incoming packets from arriving on VF */
+ hn_nvs_set_datapath(hv, NVS_DATAPATH_SYNTHETIC);
+
+ /* Stop transmission over VF */
+ hv->vf_port = HN_INVALID_PORT;
+ rte_smp_wmb();
+
+ /* Give back ownership */
+ rte_eth_dev_owner_unset(hv->vf_port, hv->owner.id);
+ }
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+/* Handle VF association message from host */
+void
+hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
+ const struct vmbus_chanpkt_hdr *hdr,
+ const void *data)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ const struct hn_nvs_vf_association *vf_assoc = data;
+
+ if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*vf_assoc))) {
+ PMD_DRV_LOG(ERR, "invalid vf association NVS");
+ return;
+ }
+
+ PMD_DRV_LOG(DEBUG, "VF serial %u %s port %u",
+ vf_assoc->serial,
+ vf_assoc->allocated ? "add to" : "remove from",
+ dev->data->port_id);
+
+ hv->vf_present = vf_assoc->allocated;
+
+ if (dev->state != RTE_ETH_DEV_ATTACHED)
+ return;
+
+ if (vf_assoc->allocated)
+ hn_vf_add(dev, hv);
+ else
+ hn_vf_remove(hv);
+}
+
+/*
+ * Merge the info from the VF and synthetic path.
+ * use the default config of the VF
+ * and the minimum number of queues and buffer sizes.
+ */
+static void hn_vf_info_merge(struct rte_eth_dev *vf_dev,
+ struct rte_eth_dev_info *info)
+{
+ struct rte_eth_dev_info vf_info;
+
+ rte_eth_dev_info_get(vf_dev->data->port_id, &vf_info);
+
+ info->speed_capa = vf_info.speed_capa;
+ info->default_rxportconf = vf_info.default_rxportconf;
+ info->default_txportconf = vf_info.default_txportconf;
+
+ info->max_rx_queues = RTE_MIN(vf_info.max_rx_queues,
+ info->max_rx_queues);
+ info->rx_offload_capa &= vf_info.rx_offload_capa;
+ info->rx_queue_offload_capa &= vf_info.rx_queue_offload_capa;
+ info->flow_type_rss_offloads &= vf_info.flow_type_rss_offloads;
+
+ info->max_tx_queues = RTE_MIN(vf_info.max_tx_queues,
+ info->max_tx_queues);
+ info->tx_offload_capa &= vf_info.tx_offload_capa;
+ info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa;
+
+ info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize,
+ info->min_rx_bufsize);
+ info->max_rx_pktlen = RTE_MAX(vf_info.max_rx_pktlen,
+ info->max_rx_pktlen);
+}
+
+void hn_vf_info_get(struct hn_data *hv, struct rte_eth_dev_info *info)
+{
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev)
+ hn_vf_info_merge(vf_dev, info);
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_link_update(struct rte_eth_dev *dev,
+ int wait_to_complete)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev && vf_dev->dev_ops->link_update)
+ ret = (*vf_dev->dev_ops->link_update)(vf_dev, wait_to_complete);
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ return ret;
+}
+
+/* called when VF has link state interrupts enabled */
+static int hn_vf_lsc_event(uint16_t port_id __rte_unused,
+ enum rte_eth_event_type event,
+ void *cb_arg, void *out __rte_unused)
+{
+ struct rte_eth_dev *dev = cb_arg;
+
+ if (event != RTE_ETH_EVENT_INTR_LSC)
+ return 0;
+
+ /* if link state has changed pass on */
+ if (hn_dev_link_update(dev, 0) == 0)
+ return 0; /* no change */
+
+ return _rte_eth_dev_callback_process(dev,
+ RTE_ETH_EVENT_INTR_LSC,
+ NULL);
+}
+
+static int _hn_vf_configure(struct rte_eth_dev *dev,
+ uint16_t vf_port,
+ const struct rte_eth_conf *dev_conf)
+{
+ struct rte_eth_conf vf_conf = *dev_conf;
+ struct rte_eth_dev *vf_dev;
+ int ret;
+
+ vf_dev = &rte_eth_devices[vf_port];
+ if (dev_conf->intr_conf.lsc &&
+ (vf_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
+ PMD_DRV_LOG(DEBUG, "enabling LSC for VF %u",
+ vf_port);
+ vf_conf.intr_conf.lsc = 1;
+ } else {
+ PMD_DRV_LOG(DEBUG, "disabling LSC for VF %u",
+ vf_port);
+ vf_conf.intr_conf.lsc = 0;
+ }
+
+ ret = rte_eth_dev_configure(vf_port,
+ dev->data->nb_rx_queues,
+ dev->data->nb_tx_queues,
+ &vf_conf);
+ if (ret) {
+ PMD_DRV_LOG(ERR,
+ "VF configuration failed: %d", ret);
+ } else if (vf_conf.intr_conf.lsc) {
+ ret = rte_eth_dev_callback_register(vf_port,
+ RTE_ETH_DEV_INTR_LSC,
+ hn_vf_lsc_event, dev);
+ if (ret)
+ PMD_DRV_LOG(ERR,
+ "Failed to register LSC callback for VF %u",
+ vf_port);
+ }
+ return ret;
+}
+
+/*
+ * Configure VF if present.
+ * Force VF to have same number of queues as synthetic device
+ */
+int hn_vf_configure(struct rte_eth_dev *dev,
+ const struct rte_eth_conf *dev_conf)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ if (hv->vf_port != HN_INVALID_PORT)
+ ret = _hn_vf_configure(dev, hv->vf_port, dev_conf);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ const uint32_t *ptypes = NULL;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev && vf_dev->dev_ops->dev_supported_ptypes_get)
+ ptypes = (*vf_dev->dev_ops->dev_supported_ptypes_get)(vf_dev);
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ return ptypes;
+}
+
+int hn_vf_start(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev)
+ ret = rte_eth_dev_start(vf_dev->data->port_id);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+void hn_vf_stop(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev)
+ rte_eth_dev_stop(vf_dev->data->port_id);
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+/* If VF is present, then cascade configuration down */
+#define VF_ETHDEV_FUNC(dev, func) \
+ { \
+ struct hn_data *hv = (dev)->data->dev_private; \
+ struct rte_eth_dev *vf_dev; \
+ rte_spinlock_lock(&hv->vf_lock); \
+ vf_dev = hn_get_vf_dev(hv); \
+ if (vf_dev) \
+ func(vf_dev->data->port_id); \
+ rte_spinlock_unlock(&hv->vf_lock); \
+ }
+
+void hn_vf_reset(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_dev_reset);
+}
+
+void hn_vf_close(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ uint16_t vf_port;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_port = hv->vf_port;
+ if (vf_port != HN_INVALID_PORT)
+ rte_eth_dev_close(vf_port);
+
+ hv->vf_port = HN_INVALID_PORT;
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+void hn_vf_stats_reset(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_stats_reset);
+}
+
+void hn_vf_allmulticast_enable(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_allmulticast_enable);
+}
+
+void hn_vf_allmulticast_disable(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_allmulticast_disable);
+}
+
+void hn_vf_promiscuous_enable(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_promiscuous_enable);
+}
+
+void hn_vf_promiscuous_disable(struct rte_eth_dev *dev)
+{
+ VF_ETHDEV_FUNC(dev, rte_eth_promiscuous_disable);
+}
+
+int hn_vf_mc_addr_list(struct rte_eth_dev *dev,
+ struct ether_addr *mc_addr_set,
+ uint32_t nb_mc_addr)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev)
+ ret = rte_eth_dev_set_mc_addr_list(vf_dev->data->port_id,
+ mc_addr_set, nb_mc_addr);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+int hn_vf_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev)
+ ret = rte_eth_tx_queue_setup(vf_dev->data->port_id,
+ queue_idx, nb_desc,
+ socket_id, tx_conf);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id)
+{
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev && vf_dev->dev_ops->tx_queue_release) {
+ void *subq = vf_dev->data->tx_queues[queue_id];
+
+ (*vf_dev->dev_ops->tx_queue_release)(subq);
+ }
+
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t queue_idx, uint16_t nb_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *mp)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev)
+ ret = rte_eth_rx_queue_setup(vf_dev->data->port_id,
+ queue_idx, nb_desc,
+ socket_id, rx_conf, mp);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id)
+{
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev && vf_dev->dev_ops->rx_queue_release) {
+ void *subq = vf_dev->data->rx_queues[queue_id];
+
+ (*vf_dev->dev_ops->rx_queue_release)(subq);
+ }
+ rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int ret = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev)
+ ret = rte_eth_stats_get(vf_dev->data->port_id, stats);
+ rte_spinlock_unlock(&hv->vf_lock);
+ return ret;
+}
+
+int hn_vf_xstats_get_names(struct rte_eth_dev *dev,
+ struct rte_eth_xstat_name *names,
+ unsigned int n)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int i, count = 0;
+ char tmp[RTE_ETH_XSTATS_NAME_SIZE];
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev && vf_dev->dev_ops->xstats_get_names)
+ count = vf_dev->dev_ops->xstats_get_names(vf_dev, names, n);
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ /* add vf_ prefix to xstat names */
+ if (names) {
+ for (i = 0; i < count; i++) {
+ snprintf(tmp, sizeof(tmp), "vf_%s", names[i].name);
+ strlcpy(names[i].name, tmp, sizeof(names[i].name));
+ }
+ }
+
+ return count;
+}
+
+int hn_vf_xstats_get(struct rte_eth_dev *dev,
+ struct rte_eth_xstat *xstats,
+ unsigned int n)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+ int count = 0;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev && vf_dev->dev_ops->xstats_get)
+ count = vf_dev->dev_ops->xstats_get(vf_dev, xstats, n);
+ rte_spinlock_unlock(&hv->vf_lock);
+
+ return count;
+}
+
+void hn_vf_xstats_reset(struct rte_eth_dev *dev)
+{
+ struct hn_data *hv = dev->data->dev_private;
+ struct rte_eth_dev *vf_dev;
+
+ rte_spinlock_lock(&hv->vf_lock);
+ vf_dev = hn_get_vf_dev(hv);
+ if (vf_dev && vf_dev->dev_ops->xstats_reset)
+ vf_dev->dev_ops->xstats_reset(vf_dev);
+ rte_spinlock_unlock(&hv->vf_lock);
+}
diff --git a/src/seastar/dpdk/drivers/net/netvsc/meson.build b/src/seastar/dpdk/drivers/net/netvsc/meson.build
new file mode 100644
index 000000000..c84269716
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/meson.build
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Microsoft Corporation
+
+build = dpdk_conf.has('RTE_LIBRTE_VMBUS_BUS')
+version = 2
+sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c', 'hn_vf.c')
+
+deps += ['bus_vmbus' ]
+
+allow_experimental_apis = true
diff --git a/src/seastar/dpdk/drivers/net/netvsc/ndis.h b/src/seastar/dpdk/drivers/net/netvsc/ndis.h
new file mode 100644
index 000000000..2e7ca99b1
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/ndis.h
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * All rights reserved.
+ */
+
+#ifndef _NET_NDIS_H_
+#define _NET_NDIS_H_
+
+#define NDIS_MEDIA_STATE_CONNECTED 0
+#define NDIS_MEDIA_STATE_DISCONNECTED 1
+
+#define NDIS_NETCHANGE_TYPE_POSSIBLE 1
+#define NDIS_NETCHANGE_TYPE_DEFINITE 2
+#define NDIS_NETCHANGE_TYPE_FROMMEDIA 3
+
+#define NDIS_OFFLOAD_SET_NOCHG 0
+#define NDIS_OFFLOAD_SET_ON 1
+#define NDIS_OFFLOAD_SET_OFF 2
+
+/* a.k.a GRE MAC */
+#define NDIS_ENCAP_TYPE_NVGRE 0x00000001
+
+#define NDIS_HASH_FUNCTION_MASK 0x000000FF /* see hash function */
+#define NDIS_HASH_TYPE_MASK 0x00FFFF00 /* see hash type */
+
+/* hash function */
+#define NDIS_HASH_FUNCTION_TOEPLITZ 0x00000001
+
+/* hash type */
+#define NDIS_HASH_IPV4 0x00000100
+#define NDIS_HASH_TCP_IPV4 0x00000200
+#define NDIS_HASH_IPV6 0x00000400
+#define NDIS_HASH_IPV6_EX 0x00000800
+#define NDIS_HASH_TCP_IPV6 0x00001000
+#define NDIS_HASH_TCP_IPV6_EX 0x00002000
+
+#define NDIS_HASH_KEYSIZE_TOEPLITZ 40
+#define NDIS_HASH_INDCNT 128
+
+#define NDIS_OBJTYPE_DEFAULT 0x80
+#define NDIS_OBJTYPE_RSS_CAPS 0x88
+#define NDIS_OBJTYPE_RSS_PARAMS 0x89
+#define NDIS_OBJTYPE_OFFLOAD 0xa7
+
+struct ndis_object_hdr {
+ uint8_t ndis_type; /* NDIS_OBJTYPE_ */
+ uint8_t ndis_rev; /* type specific */
+ uint16_t ndis_size; /* incl. this hdr */
+} __rte_packed;
+
+/*
+ * OID_TCP_OFFLOAD_PARAMETERS
+ * ndis_type: NDIS_OBJTYPE_DEFAULT
+ */
+struct ndis_offload_params {
+ struct ndis_object_hdr ndis_hdr;
+ uint8_t ndis_ip4csum; /* NDIS_OFFLOAD_PARAM_ */
+ uint8_t ndis_tcp4csum; /* NDIS_OFFLOAD_PARAM_ */
+ uint8_t ndis_udp4csum; /* NDIS_OFFLOAD_PARAM_ */
+ uint8_t ndis_tcp6csum; /* NDIS_OFFLOAD_PARAM_ */
+ uint8_t ndis_udp6csum; /* NDIS_OFFLOAD_PARAM_ */
+ uint8_t ndis_lsov1; /* NDIS_OFFLOAD_PARAM_ */
+ uint8_t ndis_ipsecv1; /* NDIS_OFFLOAD_IPSECV1_ */
+ uint8_t ndis_lsov2_ip4; /* NDIS_OFFLOAD_LSOV2_ */
+ uint8_t ndis_lsov2_ip6; /* NDIS_OFFLOAD_LSOV2_ */
+ uint8_t ndis_tcp4conn; /* 0 */
+ uint8_t ndis_tcp6conn; /* 0 */
+ uint32_t ndis_flags; /* 0 */
+ /* NDIS >= 6.1 */
+ uint8_t ndis_ipsecv2; /* NDIS_OFFLOAD_IPSECV2_ */
+ uint8_t ndis_ipsecv2_ip4;/* NDIS_OFFLOAD_IPSECV2_ */
+ /* NDIS >= 6.30 */
+ uint8_t ndis_rsc_ip4; /* NDIS_OFFLOAD_RSC_ */
+ uint8_t ndis_rsc_ip6; /* NDIS_OFFLOAD_RSC_ */
+ uint8_t ndis_encap; /* NDIS_OFFLOAD_SET_ */
+ uint8_t ndis_encap_types;/* NDIS_ENCAP_TYPE_ */
+};
+
+#define NDIS_OFFLOAD_PARAMS_SIZE sizeof(struct ndis_offload_params)
+#define NDIS_OFFLOAD_PARAMS_SIZE_6_1 \
+ offsetof(struct ndis_offload_params, ndis_rsc_ip4)
+
+#define NDIS_OFFLOAD_PARAMS_REV_2 2 /* NDIS 6.1 */
+#define NDIS_OFFLOAD_PARAMS_REV_3 3 /* NDIS 6.30 */
+
+#define NDIS_OFFLOAD_PARAM_NOCHG 0 /* common */
+#define NDIS_OFFLOAD_PARAM_OFF 1
+#define NDIS_OFFLOAD_PARAM_TX 2
+#define NDIS_OFFLOAD_PARAM_RX 3
+#define NDIS_OFFLOAD_PARAM_TXRX 4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define NDIS_OFFLOAD_LSOV1_OFF 1
+#define NDIS_OFFLOAD_LSOV1_ON 2
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define NDIS_OFFLOAD_IPSECV1_OFF 1
+#define NDIS_OFFLOAD_IPSECV1_AH 2
+#define NDIS_OFFLOAD_IPSECV1_ESP 3
+#define NDIS_OFFLOAD_IPSECV1_AH_ESP 4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define NDIS_OFFLOAD_LSOV2_OFF 1
+#define NDIS_OFFLOAD_LSOV2_ON 2
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define NDIS_OFFLOAD_IPSECV2_OFF 1
+#define NDIS_OFFLOAD_IPSECV2_AH 2
+#define NDIS_OFFLOAD_IPSECV2_ESP 3
+#define NDIS_OFFLOAD_IPSECV2_AH_ESP 4
+
+/* NDIS_OFFLOAD_PARAM_NOCHG */
+#define NDIS_OFFLOAD_RSC_OFF 1
+#define NDIS_OFFLOAD_RSC_ON 2
+
+/*
+ * OID_GEN_RECEIVE_SCALE_CAPABILITIES
+ * ndis_type: NDIS_OBJTYPE_RSS_CAPS
+ */
+struct ndis_rss_caps {
+ struct ndis_object_hdr ndis_hdr;
+ uint32_t ndis_caps; /* NDIS_RSS_CAP_ */
+ uint32_t ndis_nmsi; /* # of MSIs */
+ uint32_t ndis_nrxr; /* # of RX rings */
+ /* NDIS >= 6.30 */
+ uint16_t ndis_nind; /* # of indtbl ent. */
+ uint16_t ndis_pad;
+} __rte_packed;
+
+#define NDIS_RSS_CAPS_SIZE \
+ offsetof(struct ndis_rss_caps, ndis_pad)
+#define NDIS_RSS_CAPS_SIZE_6_0 \
+ offsetof(struct ndis_rss_caps, ndis_nind)
+
+#define NDIS_RSS_CAPS_REV_1 1 /* NDIS 6.{0,1,20} */
+#define NDIS_RSS_CAPS_REV_2 2 /* NDIS 6.30 */
+
+#define NDIS_RSS_CAP_MSI 0x01000000
+#define NDIS_RSS_CAP_CLASSIFY_ISR 0x02000000
+#define NDIS_RSS_CAP_CLASSIFY_DPC 0x04000000
+#define NDIS_RSS_CAP_MSIX 0x08000000
+#define NDIS_RSS_CAP_IPV4 0x00000100
+#define NDIS_RSS_CAP_IPV6 0x00000200
+#define NDIS_RSS_CAP_IPV6_EX 0x00000400
+#define NDIS_RSS_CAP_HASH_TOEPLITZ NDIS_HASH_FUNCTION_TOEPLITZ
+#define NDIS_RSS_CAP_HASHFUNC_MASK NDIS_HASH_FUNCTION_MASK
+
+/*
+ * OID_GEN_RECEIVE_SCALE_PARAMETERS
+ * ndis_type: NDIS_OBJTYPE_RSS_PARAMS
+ */
+struct ndis_rss_params {
+ struct ndis_object_hdr ndis_hdr;
+ uint16_t ndis_flags; /* NDIS_RSS_FLAG_ */
+ uint16_t ndis_bcpu; /* base cpu 0 */
+ uint32_t ndis_hash; /* NDIS_HASH_ */
+ uint16_t ndis_indsize; /* indirect table */
+ uint32_t ndis_indoffset;
+ uint16_t ndis_keysize; /* hash key */
+ uint32_t ndis_keyoffset;
+ /* NDIS >= 6.20 */
+ uint32_t ndis_cpumaskoffset;
+ uint32_t ndis_cpumaskcnt;
+ uint32_t ndis_cpumaskentsz;
+};
+
+#define NDIS_RSS_PARAMS_SIZE sizeof(struct ndis_rss_params)
+#define NDIS_RSS_PARAMS_SIZE_6_0 \
+ offsetof(struct ndis_rss_params, ndis_cpumaskoffset)
+
+#define NDIS_RSS_PARAMS_REV_1 1 /* NDIS 6.0 */
+#define NDIS_RSS_PARAMS_REV_2 2 /* NDIS 6.20 */
+
+#define NDIS_RSS_FLAG_NONE 0x0000
+#define NDIS_RSS_FLAG_BCPU_UNCHG 0x0001
+#define NDIS_RSS_FLAG_HASH_UNCHG 0x0002
+#define NDIS_RSS_FLAG_IND_UNCHG 0x0004
+#define NDIS_RSS_FLAG_KEY_UNCHG 0x0008
+#define NDIS_RSS_FLAG_DISABLE 0x0010
+
+/* non-standard convenient struct */
+struct ndis_rssprm_toeplitz {
+ struct ndis_rss_params rss_params;
+ /* Indirect table */
+ uint32_t rss_ind[NDIS_HASH_INDCNT];
+ /* Toeplitz hash key */
+ uint8_t rss_key[NDIS_HASH_KEYSIZE_TOEPLITZ];
+};
+
+#define NDIS_RSSPRM_TOEPLITZ_SIZE(nind) \
+ offsetof(struct ndis_rssprm_toeplitz, rss_ind[nind])
+
+/*
+ * OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES
+ * ndis_type: NDIS_OBJTYPE_OFFLOAD
+ */
+
+#define NDIS_OFFLOAD_ENCAP_NONE 0x0000
+#define NDIS_OFFLOAD_ENCAP_NULL 0x0001
+#define NDIS_OFFLOAD_ENCAP_8023 0x0002
+#define NDIS_OFFLOAD_ENCAP_8023PQ 0x0004
+#define NDIS_OFFLOAD_ENCAP_8023PQ_OOB 0x0008
+#define NDIS_OFFLOAD_ENCAP_RFC1483 0x0010
+
+struct ndis_csum_offload {
+ uint32_t ndis_ip4_txenc; /*NDIS_OFFLOAD_ENCAP_*/
+ uint32_t ndis_ip4_txcsum;
+#define NDIS_TXCSUM_CAP_IP4OPT 0x001
+#define NDIS_TXCSUM_CAP_TCP4OPT 0x004
+#define NDIS_TXCSUM_CAP_TCP4 0x010
+#define NDIS_TXCSUM_CAP_UDP4 0x040
+#define NDIS_TXCSUM_CAP_IP4 0x100
+ uint32_t ndis_ip4_rxenc; /*NDIS_OFFLOAD_ENCAP_*/
+ uint32_t ndis_ip4_rxcsum;
+#define NDIS_RXCSUM_CAP_IP4OPT 0x001
+#define NDIS_RXCSUM_CAP_TCP4OPT 0x004
+#define NDIS_RXCSUM_CAP_TCP4 0x010
+#define NDIS_RXCSUM_CAP_UDP4 0x040
+#define NDIS_RXCSUM_CAP_IP4 0x100
+ uint32_t ndis_ip6_txenc; /*NDIS_OFFLOAD_ENCAP_*/
+ uint32_t ndis_ip6_txcsum;
+#define NDIS_TXCSUM_CAP_IP6EXT 0x001
+#define NDIS_TXCSUM_CAP_TCP6OPT 0x004
+#define NDIS_TXCSUM_CAP_TCP6 0x010
+#define NDIS_TXCSUM_CAP_UDP6 0x040
+ uint32_t ndis_ip6_rxenc; /*NDIS_OFFLOAD_ENCAP_*/
+ uint32_t ndis_ip6_rxcsum;
+#define NDIS_RXCSUM_CAP_IP6EXT 0x001
+#define NDIS_RXCSUM_CAP_TCP6OPT 0x004
+#define NDIS_RXCSUM_CAP_TCP6 0x010
+#define NDIS_RXCSUM_CAP_UDP6 0x040
+};
+
+struct ndis_lsov1_offload {
+ uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/
+ uint32_t ndis_maxsize;
+ uint32_t ndis_minsegs;
+ uint32_t ndis_opts;
+};
+
+struct ndis_ipsecv1_offload {
+ uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/
+ uint32_t ndis_ah_esp;
+ uint32_t ndis_xport_tun;
+ uint32_t ndis_ip4_opts;
+ uint32_t ndis_flags;
+ uint32_t ndis_ip4_ah;
+ uint32_t ndis_ip4_esp;
+};
+
+struct ndis_lsov2_offload {
+ uint32_t ndis_ip4_encap; /*NDIS_OFFLOAD_ENCAP_*/
+ uint32_t ndis_ip4_maxsz;
+ uint32_t ndis_ip4_minsg;
+ uint32_t ndis_ip6_encap; /*NDIS_OFFLOAD_ENCAP_*/
+ uint32_t ndis_ip6_maxsz;
+ uint32_t ndis_ip6_minsg;
+ uint32_t ndis_ip6_opts;
+#define NDIS_LSOV2_CAP_IP6EXT 0x001
+#define NDIS_LSOV2_CAP_TCP6OPT 0x004
+};
+
+struct ndis_ipsecv2_offload {
+ uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/
+ uint16_t ndis_ip6;
+ uint16_t ndis_ip4opt;
+ uint16_t ndis_ip6ext;
+ uint16_t ndis_ah;
+ uint16_t ndis_esp;
+ uint16_t ndis_ah_esp;
+ uint16_t ndis_xport;
+ uint16_t ndis_tun;
+ uint16_t ndis_xport_tun;
+ uint16_t ndis_lso;
+ uint16_t ndis_extseq;
+ uint32_t ndis_udp_esp;
+ uint32_t ndis_auth;
+ uint32_t ndis_crypto;
+ uint32_t ndis_sa_caps;
+};
+
+struct ndis_rsc_offload {
+ uint16_t ndis_ip4;
+ uint16_t ndis_ip6;
+};
+
+struct ndis_encap_offload {
+ uint32_t ndis_flags;
+ uint32_t ndis_maxhdr;
+};
+
+struct ndis_offload {
+ struct ndis_object_hdr ndis_hdr;
+ struct ndis_csum_offload ndis_csum;
+ struct ndis_lsov1_offload ndis_lsov1;
+ struct ndis_ipsecv1_offload ndis_ipsecv1;
+ struct ndis_lsov2_offload ndis_lsov2;
+ uint32_t ndis_flags;
+ /* NDIS >= 6.1 */
+ struct ndis_ipsecv2_offload ndis_ipsecv2;
+ /* NDIS >= 6.30 */
+ struct ndis_rsc_offload ndis_rsc;
+ struct ndis_encap_offload ndis_encap_gre;
+};
+
+#define NDIS_OFFLOAD_SIZE sizeof(struct ndis_offload)
+#define NDIS_OFFLOAD_SIZE_6_0 offsetof(struct ndis_offload, ndis_ipsecv2)
+#define NDIS_OFFLOAD_SIZE_6_1 offsetof(struct ndis_offload, ndis_rsc)
+
+#define NDIS_OFFLOAD_REV_1 1 /* NDIS 6.0 */
+#define NDIS_OFFLOAD_REV_2 2 /* NDIS 6.1 */
+#define NDIS_OFFLOAD_REV_3 3 /* NDIS 6.30 */
+
+/*
+ * Per-packet-info
+ */
+
+/* VLAN */
+#define NDIS_VLAN_INFO_SIZE sizeof(uint32_t)
+#define NDIS_VLAN_INFO_PRI_MASK 0x0007
+#define NDIS_VLAN_INFO_CFI_MASK 0x0008
+#define NDIS_VLAN_INFO_ID_MASK 0xfff0
+#define NDIS_VLAN_INFO_MAKE(id, pri, cfi) \
+ (((pri) & NDIS_VLAN_INFO_PRI_MASK) | \
+ (((cfi) & 0x1) << 3) | (((id) & 0xfff) << 4))
+#define NDIS_VLAN_INFO_ID(inf) (((inf) & NDIS_VLAN_INFO_ID_MASK) >> 4)
+#define NDIS_VLAN_INFO_CFI(inf) (((inf) & NDIS_VLAN_INFO_CFI_MASK) >> 3)
+#define NDIS_VLAN_INFO_PRI(inf) ((inf) & NDIS_VLAN_INFO_PRI_MASK)
+
+/* Reception checksum */
+#define NDIS_RXCSUM_INFO_SIZE sizeof(uint32_t)
+#define NDIS_RXCSUM_INFO_TCPCS_FAILED 0x0001
+#define NDIS_RXCSUM_INFO_UDPCS_FAILED 0x0002
+#define NDIS_RXCSUM_INFO_IPCS_FAILED 0x0004
+#define NDIS_RXCSUM_INFO_TCPCS_OK 0x0008
+#define NDIS_RXCSUM_INFO_UDPCS_OK 0x0010
+#define NDIS_RXCSUM_INFO_IPCS_OK 0x0020
+#define NDIS_RXCSUM_INFO_LOOPBACK 0x0040
+#define NDIS_RXCSUM_INFO_TCPCS_INVAL 0x0080
+#define NDIS_RXCSUM_INFO_IPCS_INVAL 0x0100
+
+/* LSOv2 */
+#define NDIS_LSO2_INFO_SIZE sizeof(uint32_t)
+#define NDIS_LSO2_INFO_MSS_MASK 0x000fffff
+#define NDIS_LSO2_INFO_THOFF_MASK 0x3ff00000
+#define NDIS_LSO2_INFO_ISLSO2 0x40000000
+#define NDIS_LSO2_INFO_ISIPV6 0x80000000
+
+#define NDIS_LSO2_INFO_MAKE(thoff, mss) \
+ ((((uint32_t)(mss)) & NDIS_LSO2_INFO_MSS_MASK) | \
+ ((((uint32_t)(thoff)) & 0x3ff) << 20) | \
+ NDIS_LSO2_INFO_ISLSO2)
+
+#define NDIS_LSO2_INFO_MAKEIPV4(thoff, mss) \
+ NDIS_LSO2_INFO_MAKE((thoff), (mss))
+
+#define NDIS_LSO2_INFO_MAKEIPV6(thoff, mss) \
+ (NDIS_LSO2_INFO_MAKE((thoff), (mss)) | NDIS_LSO2_INFO_ISIPV6)
+
+/* Transmission checksum */
+#define NDIS_TXCSUM_INFO_SIZE sizeof(uint32_t)
+#define NDIS_TXCSUM_INFO_IPV4 0x00000001
+#define NDIS_TXCSUM_INFO_IPV6 0x00000002
+#define NDIS_TXCSUM_INFO_TCPCS 0x00000004
+#define NDIS_TXCSUM_INFO_UDPCS 0x00000008
+#define NDIS_TXCSUM_INFO_IPCS 0x00000010
+#define NDIS_TXCSUM_INFO_THOFF 0x03ff0000
+
+#define NDIS_TXCSUM_INFO_MKL4CS(thoff, flag) \
+ ((((uint32_t)(thoff)) << 16) | (flag))
+
+#define NDIS_TXCSUM_INFO_MKTCPCS(thoff) \
+ NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_TCPCS)
+
+#define NDIS_TXCSUM_INFO_MKUDPCS(thoff) \
+ NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_UDPCS)
+
+#endif /* !_NET_NDIS_H_ */
diff --git a/src/seastar/dpdk/drivers/net/netvsc/rndis.h b/src/seastar/dpdk/drivers/net/netvsc/rndis.h
new file mode 100644
index 000000000..eac9a99fd
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/rndis.h
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * Copyright (c) 2010 Jonathan Armani <armani@openbsd.org>
+ * Copyright (c) 2010 Fabien Romano <fabien@openbsd.org>
+ * Copyright (c) 2010 Michael Knudsen <mk@openbsd.org>
+ * All rights reserved.
+ */
+
+#ifndef _NET_RNDIS_H_
+#define _NET_RNDIS_H_
+
+/* Canonical major/minor version as of 22th Aug. 2016. */
+#define RNDIS_VERSION_MAJOR 0x00000001
+#define RNDIS_VERSION_MINOR 0x00000000
+
+#define RNDIS_STATUS_SUCCESS 0x00000000
+#define RNDIS_STATUS_PENDING 0x00000103
+
+#define RNDIS_STATUS_ONLINE 0x40010003
+#define RNDIS_STATUS_RESET_START 0x40010004
+#define RNDIS_STATUS_RESET_END 0x40010005
+#define RNDIS_STATUS_RING_STATUS 0x40010006
+#define RNDIS_STATUS_CLOSED 0x40010007
+#define RNDIS_STATUS_WAN_LINE_UP 0x40010008
+#define RNDIS_STATUS_WAN_LINE_DOWN 0x40010009
+#define RNDIS_STATUS_WAN_FRAGMENT 0x4001000A
+#define RNDIS_STATUS_MEDIA_CONNECT 0x4001000B
+#define RNDIS_STATUS_MEDIA_DISCONNECT 0x4001000C
+#define RNDIS_STATUS_HARDWARE_LINE_UP 0x4001000D
+#define RNDIS_STATUS_HARDWARE_LINE_DOWN 0x4001000E
+#define RNDIS_STATUS_INTERFACE_UP 0x4001000F
+#define RNDIS_STATUS_INTERFACE_DOWN 0x40010010
+#define RNDIS_STATUS_MEDIA_BUSY 0x40010011
+#define RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION 0x40010012
+#define RNDIS_STATUS_WW_INDICATION RDIA_SPECIFIC_INDICATION
+#define RNDIS_STATUS_LINK_SPEED_CHANGE 0x40010013
+#define RNDIS_STATUS_NETWORK_CHANGE 0x40010018
+#define RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG 0x40020006
+
+#define RNDIS_STATUS_FAILURE 0xC0000001
+#define RNDIS_STATUS_RESOURCES 0xC000009A
+#define RNDIS_STATUS_NOT_SUPPORTED 0xC00000BB
+#define RNDIS_STATUS_CLOSING 0xC0010002
+#define RNDIS_STATUS_BAD_VERSION 0xC0010004
+#define RNDIS_STATUS_BAD_CHARACTERISTICS 0xC0010005
+#define RNDIS_STATUS_ADAPTER_NOT_FOUND 0xC0010006
+#define RNDIS_STATUS_OPEN_FAILED 0xC0010007
+#define RNDIS_STATUS_DEVICE_FAILED 0xC0010008
+#define RNDIS_STATUS_MULTICAST_FULL 0xC0010009
+#define RNDIS_STATUS_MULTICAST_EXISTS 0xC001000A
+#define RNDIS_STATUS_MULTICAST_NOT_FOUND 0xC001000B
+#define RNDIS_STATUS_REQUEST_ABORTED 0xC001000C
+#define RNDIS_STATUS_RESET_IN_PROGRESS 0xC001000D
+#define RNDIS_STATUS_CLOSING_INDICATING 0xC001000E
+#define RNDIS_STATUS_INVALID_PACKET 0xC001000F
+#define RNDIS_STATUS_OPEN_LIST_FULL 0xC0010010
+#define RNDIS_STATUS_ADAPTER_NOT_READY 0xC0010011
+#define RNDIS_STATUS_ADAPTER_NOT_OPEN 0xC0010012
+#define RNDIS_STATUS_NOT_INDICATING 0xC0010013
+#define RNDIS_STATUS_INVALID_LENGTH 0xC0010014
+#define RNDIS_STATUS_INVALID_DATA 0xC0010015
+#define RNDIS_STATUS_BUFFER_TOO_SHORT 0xC0010016
+#define RNDIS_STATUS_INVALID_OID 0xC0010017
+#define RNDIS_STATUS_ADAPTER_REMOVED 0xC0010018
+#define RNDIS_STATUS_UNSUPPORTED_MEDIA 0xC0010019
+#define RNDIS_STATUS_GROUP_ADDRESS_IN_US 0xC001001A
+#define RNDIS_STATUS_FILE_NOT_FOUND 0xC001001B
+#define RNDIS_STATUS_ERROR_READING_FILE 0xC001001C
+#define RNDIS_STATUS_ALREADY_MAPPED 0xC001001D
+#define RNDIS_STATUS_RESOURCE_CONFLICT 0xC001001E
+#define RNDIS_STATUS_NO_CABLE 0xC001001F
+
+#define OID_GEN_SUPPORTED_LIST 0x00010101
+#define OID_GEN_HARDWARE_STATUS 0x00010102
+#define OID_GEN_MEDIA_SUPPORTED 0x00010103
+#define OID_GEN_MEDIA_IN_USE 0x00010104
+#define OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105
+#define OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106
+#define OID_GEN_LINK_SPEED 0x00010107
+#define OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108
+#define OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109
+#define OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A
+#define OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B
+#define OID_GEN_VENDOR_ID 0x0001010C
+#define OID_GEN_VENDOR_DESCRIPTION 0x0001010D
+#define OID_GEN_CURRENT_PACKET_FILTER 0x0001010E
+#define OID_GEN_CURRENT_LOOKAHEAD 0x0001010F
+#define OID_GEN_DRIVER_VERSION 0x00010110
+#define OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111
+#define OID_GEN_PROTOCOL_OPTIONS 0x00010112
+#define OID_GEN_MAC_OPTIONS 0x00010113
+#define OID_GEN_MEDIA_CONNECT_STATUS 0x00010114
+#define OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115
+#define OID_GEN_VENDOR_DRIVER_VERSION 0x00010116
+#define OID_GEN_SUPPORTED_GUIDS 0x00010117
+#define OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118
+#define OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119
+#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203
+#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204
+#define OID_GEN_MACHINE_NAME 0x0001021A
+#define OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
+#define OID_GEN_VLAN_ID 0x0001021C
+
+#define OID_802_3_PERMANENT_ADDRESS 0x01010101
+#define OID_802_3_CURRENT_ADDRESS 0x01010102
+#define OID_802_3_MULTICAST_LIST 0x01010103
+#define OID_802_3_MAXIMUM_LIST_SIZE 0x01010104
+#define OID_802_3_MAC_OPTIONS 0x01010105
+#define OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101
+#define OID_802_3_XMIT_ONE_COLLISION 0x01020102
+#define OID_802_3_XMIT_MORE_COLLISIONS 0x01020103
+#define OID_802_3_XMIT_DEFERRED 0x01020201
+#define OID_802_3_XMIT_MAX_COLLISIONS 0x01020202
+#define OID_802_3_RCV_OVERRUN 0x01020203
+#define OID_802_3_XMIT_UNDERRUN 0x01020204
+#define OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205
+#define OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206
+#define OID_802_3_XMIT_LATE_COLLISIONS 0x01020207
+
+#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C
+#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D
+
+#define RNDIS_MEDIUM_802_3 0x00000000
+
+/* Device flags */
+#define RNDIS_DF_CONNECTIONLESS 0x00000001
+#define RNDIS_DF_CONNECTION_ORIENTED 0x00000002
+
+/*
+ * Common RNDIS message header.
+ */
+struct rndis_msghdr {
+ uint32_t type;
+ uint32_t len;
+};
+
+/*
+ * RNDIS data message
+ */
+#define RNDIS_PACKET_MSG 0x00000001
+
+struct rndis_packet_msg {
+ uint32_t type;
+ uint32_t len;
+ uint32_t dataoffset;
+ uint32_t datalen;
+ uint32_t oobdataoffset;
+ uint32_t oobdatalen;
+ uint32_t oobdataelements;
+ uint32_t pktinfooffset;
+ uint32_t pktinfolen;
+ uint32_t vchandle;
+ uint32_t reserved;
+};
+
+/*
+ * Minimum value for dataoffset, oobdataoffset, and
+ * pktinfooffset.
+ */
+#define RNDIS_PACKET_MSG_OFFSET_MIN \
+ (sizeof(struct rndis_packet_msg) - \
+ offsetof(struct rndis_packet_msg, dataoffset))
+
+/* Offset from the beginning of rndis_packet_msg. */
+#define RNDIS_PACKET_MSG_OFFSET_ABS(ofs) \
+ ((ofs) + offsetof(struct rndis_packet_msg, dataoffset))
+
+#define RNDIS_PACKET_MSG_OFFSET_ALIGN 4
+#define RNDIS_PACKET_MSG_OFFSET_ALIGNMASK \
+ (RNDIS_PACKET_MSG_OFFSET_ALIGN - 1)
+
+/* Per-packet-info for RNDIS data message */
+struct rndis_pktinfo {
+ uint32_t size;
+ uint32_t type; /* NDIS_PKTINFO_TYPE_ */
+ uint32_t offset;
+ uint8_t data[];
+};
+
+#define RNDIS_PKTINFO_OFFSET \
+ offsetof(struct rndis_pktinfo, data[0])
+#define RNDIS_PKTINFO_SIZE_ALIGN 4
+#define RNDIS_PKTINFO_SIZE_ALIGNMASK (RNDIS_PKTINFO_SIZE_ALIGN - 1)
+
+#define NDIS_PKTINFO_TYPE_CSUM 0
+#define NDIS_PKTINFO_TYPE_IPSEC 1
+#define NDIS_PKTINFO_TYPE_LSO 2
+#define NDIS_PKTINFO_TYPE_CLASSIFY 3
+/* reserved 4 */
+#define NDIS_PKTINFO_TYPE_SGLIST 5
+#define NDIS_PKTINFO_TYPE_VLAN 6
+#define NDIS_PKTINFO_TYPE_ORIG 7
+#define NDIS_PKTINFO_TYPE_PKT_CANCELID 8
+#define NDIS_PKTINFO_TYPE_ORIG_NBLIST 9
+#define NDIS_PKTINFO_TYPE_CACHE_NBLIST 10
+#define NDIS_PKTINFO_TYPE_PKT_PAD 11
+
+/* RNDIS extension */
+
+/* Per-packet hash info */
+#define NDIS_HASH_INFO_SIZE sizeof(uint32_t)
+#define NDIS_PKTINFO_TYPE_HASHINF NDIS_PKTINFO_TYPE_ORIG_NBLIST
+/* NDIS_HASH_ */
+
+/* Per-packet hash value */
+#define NDIS_HASH_VALUE_SIZE sizeof(uint32_t)
+#define NDIS_PKTINFO_TYPE_HASHVAL NDIS_PKTINFO_TYPE_PKT_CANCELID
+
+/* Per-packet-info size */
+#define RNDIS_PKTINFO_SIZE(dlen) offsetof(struct rndis_pktinfo, data[dlen])
+
+/*
+ * RNDIS control messages
+ */
+
+/*
+ * Common header for RNDIS completion messages.
+ *
+ * NOTE: It does not apply to RNDIS_RESET_CMPLT.
+ */
+struct rndis_comp_hdr {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+ uint32_t status;
+};
+
+/* Initialize the device. */
+#define RNDIS_INITIALIZE_MSG 0x00000002
+#define RNDIS_INITIALIZE_CMPLT 0x80000002
+
+struct rndis_init_req {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+ uint32_t ver_major;
+ uint32_t ver_minor;
+ uint32_t max_xfersz;
+};
+
+struct rndis_init_comp {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+ uint32_t status;
+ uint32_t ver_major;
+ uint32_t ver_minor;
+ uint32_t devflags;
+ uint32_t medium;
+ uint32_t pktmaxcnt;
+ uint32_t pktmaxsz;
+ uint32_t align;
+ uint32_t aflistoffset;
+ uint32_t aflistsz;
+};
+
+#define RNDIS_INIT_COMP_SIZE_MIN \
+ offsetof(struct rndis_init_comp, aflistsz)
+
+/* Halt the device. No response sent. */
+#define RNDIS_HALT_MSG 0x00000003
+
+struct rndis_halt_req {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+};
+
+/* Send a query object. */
+#define RNDIS_QUERY_MSG 0x00000004
+#define RNDIS_QUERY_CMPLT 0x80000004
+
+struct rndis_query_req {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+ uint32_t oid;
+ uint32_t infobuflen;
+ uint32_t infobufoffset;
+ uint32_t devicevchdl;
+};
+
+#define RNDIS_QUERY_REQ_INFOBUFOFFSET \
+ (sizeof(struct rndis_query_req) - \
+ offsetof(struct rndis_query_req, rid))
+
+struct rndis_query_comp {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+ uint32_t status;
+ uint32_t infobuflen;
+ uint32_t infobufoffset;
+};
+
+/* infobuf offset from the beginning of rndis_query_comp. */
+#define RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(ofs) \
+ ((ofs) + offsetof(struct rndis_query_comp, rid))
+
+/* Send a set object request. */
+#define RNDIS_SET_MSG 0x00000005
+#define RNDIS_SET_CMPLT 0x80000005
+
+struct rndis_set_req {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+ uint32_t oid;
+ uint32_t infobuflen;
+ uint32_t infobufoffset;
+ uint32_t devicevchdl;
+};
+
+#define RNDIS_SET_REQ_INFOBUFOFFSET \
+ (sizeof(struct rndis_set_req) - \
+ offsetof(struct rndis_set_req, rid))
+
+struct rndis_set_comp {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+ uint32_t status;
+};
+
+/*
+ * Parameter used by OID_GEN_RNDIS_CONFIG_PARAMETER.
+ */
+#define RNDIS_SET_PARAM_NUMERIC 0x00000000
+#define RNDIS_SET_PARAM_STRING 0x00000002
+
+struct rndis_set_parameter {
+ uint32_t nameoffset;
+ uint32_t namelen;
+ uint32_t type;
+ uint32_t valueoffset;
+ uint32_t valuelen;
+};
+
+/* Perform a soft reset on the device. */
+#define RNDIS_RESET_MSG 0x00000006
+#define RNDIS_RESET_CMPLT 0x80000006
+
+struct rndis_reset_req {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+};
+
+struct rndis_reset_comp {
+ uint32_t type;
+ uint32_t len;
+ uint32_t status;
+ uint32_t adrreset;
+};
+
+/* 802.3 link-state or undefined message error. Sent by device. */
+#define RNDIS_INDICATE_STATUS_MSG 0x00000007
+
+struct rndis_status_msg {
+ uint32_t type;
+ uint32_t len;
+ uint32_t status;
+ uint32_t stbuflen;
+ uint32_t stbufoffset;
+ /* rndis_diag_info */
+};
+
+/* stbuf offset from the beginning of rndis_status_msg. */
+#define RNDIS_STBUFOFFSET_ABS(ofs) \
+ ((ofs) + offsetof(struct rndis_status_msg, status))
+
+/*
+ * Immediately after rndis_status_msg.stbufoffset, if a control
+ * message is malformatted, or a packet message contains inappropriate
+ * content.
+ */
+struct rndis_diag_info {
+ uint32_t diagstatus;
+ uint32_t erroffset;
+};
+
+/* Keepalive message. May be sent by device. */
+#define RNDIS_KEEPALIVE_MSG 0x00000008
+#define RNDIS_KEEPALIVE_CMPLT 0x80000008
+
+struct rndis_keepalive_req {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+};
+
+struct rndis_keepalive_comp {
+ uint32_t type;
+ uint32_t len;
+ uint32_t rid;
+ uint32_t status;
+};
+
+/* Packet filter bits used by OID_GEN_CURRENT_PACKET_FILTER */
+#define NDIS_PACKET_TYPE_NONE 0x00000000
+#define NDIS_PACKET_TYPE_DIRECTED 0x00000001
+#define NDIS_PACKET_TYPE_MULTICAST 0x00000002
+#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004
+#define NDIS_PACKET_TYPE_BROADCAST 0x00000008
+#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
+#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020
+#define NDIS_PACKET_TYPE_SMT 0x00000040
+#define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080
+#define NDIS_PACKET_TYPE_GROUP 0x00001000
+#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00002000
+#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00004000
+#define NDIS_PACKET_TYPE_MAC_FRAME 0x00008000
+
+#endif /* !_NET_RNDIS_H_ */
diff --git a/src/seastar/dpdk/drivers/net/netvsc/rte_pmd_netvsc_version.map b/src/seastar/dpdk/drivers/net/netvsc/rte_pmd_netvsc_version.map
new file mode 100644
index 000000000..d534019a6
--- /dev/null
+++ b/src/seastar/dpdk/drivers/net/netvsc/rte_pmd_netvsc_version.map
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+
+DPDK_18.08 {
+ local: *;
+};